diff --git a/_README.ipynb b/_README.ipynb index 75867ff..14add59 100644 --- a/_README.ipynb +++ b/_README.ipynb @@ -17,6 +17,48 @@ " - ```_README.md```*-----说明文档*\n", " - ```app_spec.yml```*-----定义项目的输入输出,为部署服务*\n", " - ```coding_here.ipynb```*-----输入并运行代码*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def handle(conf):\n", + " \"\"\"\n", + " 该方法是部署之后,其他人调用你的服务时候的处理方法。\n", + " 请按规范填写参数结构,这样我们就能替你自动生成配置文件,方便其他人的调用。\n", + " 范例:\n", + " params['key'] = value # value_type: str # description: some description\n", + " value_type 可以选择:img, video, audio, str, int, float, [int], [str], [float]\n", + " 参数请放到params字典中,我们会自动解析该变量。\n", + " \"\"\"\n", + "\n", + " param1 = conf['param1'] # value_type: str # description: some description\n", + " # add your code\n", + " return {'ret1': 'cat'}\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/jovyan/work'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pwd" ] }, { @@ -121,7 +163,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -135,17 +177,17 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" - }, + "version": "3.7.5" + }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } - } - } + }, + "source": [] + } + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/app_spec.yml b/app_spec.yml new file mode 100644 index 0000000..3635ae6 --- /dev/null +++ b/app_spec.yml @@ -0,0 +1,10 @@ +input: + Photo: + name: Photo + value_type: img + description: 请传入128*128的图片 +output: + Output: + name: Output + value_type: img + description: '' diff --git a/etc/.ipynb_checkpoints/dev-indices-checkpoint.txt b/etc/.ipynb_checkpoints/dev-indices-checkpoint.txt new file mode 100644 index 0000000..db36ce3 --- /dev/null +++ b/etc/.ipynb_checkpoints/dev-indices-checkpoint.txt @@ -0,0 +1,10 @@ +Indices (0-indexed) of the 100 images held out from training. +[ 113 509 242 280 533 638 644 698 751 832 10989 16008 + 13473 6659 20401 24841 26378 8103 11730 8363 16512 6736 27666 30287 + 6685 30696 16591 8424 26689 21078 27971 7202 6615 36150 9681 13137 + 1598 9726 4825 2864 1346 21784 4159 13270 19239 9844 16056 2822 + 15792 19837 5198 19980 30042 36491 15648 20315 3604 8020 1108 18235 + 16373 25717 32200 10547 6786 31384 33999 25763 20226 9447 4573 5938 + 1837 25121 17611 32751 28158 29381 13090 32210 17027 30171 12001 16240 + 22205 11808 20113 10682 33338 24015 15154 10449 11373 8736 26320 4095 + 13855 23504 2004 33307] diff --git a/etc/.ipynb_checkpoints/outpainting-checkpoint.png b/etc/.ipynb_checkpoints/outpainting-checkpoint.png new file mode 100644 index 0000000..10c71e5 Binary files /dev/null and b/etc/.ipynb_checkpoints/outpainting-checkpoint.png differ diff --git a/etc/.ipynb_checkpoints/recursive-checkpoint.png b/etc/.ipynb_checkpoints/recursive-checkpoint.png new file mode 100644 index 0000000..2e7c82c Binary files /dev/null and b/etc/.ipynb_checkpoints/recursive-checkpoint.png differ diff --git a/etc/.ipynb_checkpoints/results-checkpoint.png b/etc/.ipynb_checkpoints/results-checkpoint.png new file mode 100644 index 0000000..f1cc5f0 Binary files /dev/null and b/etc/.ipynb_checkpoints/results-checkpoint.png differ diff --git a/handler.py b/handler.py new file mode 100644 index 0000000..6639374 --- /dev/null +++ b/handler.py @@ -0,0 +1,45 @@ +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +tf.reset_default_graph() +import numpy as np +from PIL import Image +import model +import util +import os +import sys + +model_PATH='/home/jovyan/work/src/output/models/model2000.ckpt' + +def load_demo_image(in_PATH): + img = np.array(Image.open(in_PATH).convert('RGB'))[np.newaxis] / 255.0 + img_p = util.preprocess_images_outpainting(img) + return img_p + +def inference(model_PATH, img_p): + G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') + G_sample = model.generator(G_Z) + + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, model_PATH) + output, = sess.run([G_sample], feed_dict={G_Z: img_p}) + img_norm = (output[0] * 255.0).astype(np.uint8) + img = Image.fromarray(img_norm, 'RGB') + #util.save_image(output[0], out_PATH) + return img + +def handle(conf): + """ + 该方法是部署之后,其他人调用你的服务时候的处理方法。 + 请按规范填写参数结构,这样我们就能替你自动生成配置文件,方便其他人的调用。 + 范例: + params['key'] = value # value_type: str # description: some description + value_type 可以选择:img, video, audio, str, int, float, [int], [str], [float] + 参数请放到params字典中,我们会自动解析该变量。 + """ + base64_str = conf['Photo'] + image = load_demo_image(base64_str, image_size, device) + res = inference(model, image) + # add your code + return {'Output': res} + \ No newline at end of file diff --git a/images/.ipynb_checkpoints/city_128-checkpoint.png b/images/.ipynb_checkpoints/city_128-checkpoint.png new file mode 100644 index 0000000..a91190e Binary files /dev/null and b/images/.ipynb_checkpoints/city_128-checkpoint.png differ diff --git a/images/test.png b/images/test.png new file mode 100644 index 0000000..441db6b Binary files /dev/null and b/images/test.png differ diff --git a/job_logs/job-gpu-62b5d2d8c06b81cd38279610.log b/job_logs/job-gpu-62b5d2d8c06b81cd38279610.log new file mode 100644 index 0000000..d72375e --- /dev/null +++ b/job_logs/job-gpu-62b5d2d8c06b81cd38279610.log @@ -0,0 +1,10 @@ +2022-06-24T15:06:06.543501249Z SYSTEM: Preparing env... +2022-06-24T15:06:07.124450272Z SYSTEM: Running... +2022-06-24T15:06:07.897430232Z 2022-06-24 23:06:07.895520: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-24T15:06:09.566498086Z Imported model (for Places365, 128x128 images) +2022-06-24T15:06:12.24384624Z Traceback (most recent call last): +2022-06-24T15:06:12.243895488Z File "src/train.py", line 15, in +2022-06-24T15:06:12.263438511Z tf.reset_default_graph() +2022-06-24T15:06:12.263468552Z AttributeError: module 'tensorflow' has no attribute 'reset_default_graph' +2022-06-24T15:06:12.683968769Z SYSTEM: Finishing... +2022-06-24T15:06:12.897833055Z SYSTEM: Error Exists! diff --git a/job_logs/job-gpu-62b5d344b5c4eec184cc05b0.log b/job_logs/job-gpu-62b5d344b5c4eec184cc05b0.log new file mode 100644 index 0000000..1aae42c --- /dev/null +++ b/job_logs/job-gpu-62b5d344b5c4eec184cc05b0.log @@ -0,0 +1,10 @@ +2022-06-24T15:07:54.304456417Z SYSTEM: Preparing env... +2022-06-24T15:07:54.807933369Z SYSTEM: Running... +2022-06-24T15:07:55.517547472Z 2022-06-24 23:07:55.513238: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-24T15:07:57.101646267Z Imported model (for Places365, 128x128 images) +2022-06-24T15:07:59.609309108Z Traceback (most recent call last): +2022-06-24T15:07:59.609359562Z File "src/train.py", line 64, in +2022-06-24T15:07:59.614462932Z G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') +2022-06-24T15:07:59.614484248Z AttributeError: module 'tensorflow' has no attribute 'placeholder' +2022-06-24T15:08:00.047025825Z SYSTEM: Finishing... +2022-06-24T15:08:00.254692017Z SYSTEM: Error Exists! diff --git a/job_logs/job-gpu-62b5d39fd4e7f8c811b53eb1.log b/job_logs/job-gpu-62b5d39fd4e7f8c811b53eb1.log new file mode 100644 index 0000000..a5bbc26 --- /dev/null +++ b/job_logs/job-gpu-62b5d39fd4e7f8c811b53eb1.log @@ -0,0 +1,12 @@ +2022-06-24T15:09:25.204980934Z SYSTEM: Preparing env... +2022-06-24T15:09:25.719685215Z SYSTEM: Running... +2022-06-24T15:09:26.429518202Z 2022-06-24 23:09:26.428748: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-24T15:09:28.084842936Z Imported model (for Places365, 128x128 images) +2022-06-24T15:09:30.663258107Z Traceback (most recent call last): +2022-06-24T15:09:30.663303708Z File "src/train.py", line 64, in +2022-06-24T15:09:30.663549466Z G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') +2022-06-24T15:09:30.664301035Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 3097, in placeholder +2022-06-24T15:09:30.687425674Z raise RuntimeError("tf.placeholder() is not compatible with " +2022-06-24T15:09:30.687454335Z RuntimeError: tf.placeholder() is not compatible with eager execution. +2022-06-24T15:09:31.089543597Z SYSTEM: Finishing... +2022-06-24T15:09:31.296093256Z SYSTEM: Error Exists! diff --git a/job_logs/job-gpu-62b5d4052a85ae797c345e17.log b/job_logs/job-gpu-62b5d4052a85ae797c345e17.log new file mode 100644 index 0000000..eac12d0 --- /dev/null +++ b/job_logs/job-gpu-62b5d4052a85ae797c345e17.log @@ -0,0 +1,10 @@ +2022-06-24T15:11:08.102681479Z SYSTEM: Preparing env... +2022-06-24T15:11:08.698452928Z SYSTEM: Running... +2022-06-24T15:11:09.445497917Z 2022-06-24 23:11:09.437930: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-24T15:11:11.078881179Z Imported model (for Places365, 128x128 images) +2022-06-24T15:11:13.662043015Z Traceback (most recent call last): +2022-06-24T15:11:13.662088455Z File "src/train.py", line 64, in +2022-06-24T15:11:13.667490013Z reset_graph() +2022-06-24T15:11:13.667534099Z NameError: name 'reset_graph' is not defined +2022-06-24T15:11:14.148404869Z SYSTEM: Finishing... +2022-06-24T15:11:14.346444169Z SYSTEM: Error Exists! diff --git a/job_logs/job-gpu-62b5d43bc06b81cd38279613.log b/job_logs/job-gpu-62b5d43bc06b81cd38279613.log new file mode 100644 index 0000000..6ec79da --- /dev/null +++ b/job_logs/job-gpu-62b5d43bc06b81cd38279613.log @@ -0,0 +1,12 @@ +2022-06-24T15:12:01.617089532Z SYSTEM: Preparing env... +2022-06-24T15:12:02.203109161Z SYSTEM: Running... +2022-06-24T15:12:03.004629571Z 2022-06-24 23:12:03.003884: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-24T15:12:04.639278285Z Imported model (for Places365, 128x128 images) +2022-06-24T15:12:07.282805014Z Traceback (most recent call last): +2022-06-24T15:12:07.282856749Z File "src/train.py", line 71, in +2022-06-24T15:12:07.283554986Z data = np.load('places/places_128.npz') +2022-06-24T15:12:07.283591663Z File "/usr/local/lib/python3.7/dist-packages/numpy/lib/npyio.py", line 428, in load +2022-06-24T15:12:07.299470153Z fid = open(os_fspath(file), "rb") +2022-06-24T15:12:07.299489677Z FileNotFoundError: [Errno 2] No such file or directory: 'places/places_128.npz' +2022-06-24T15:12:07.850426625Z SYSTEM: Finishing... +2022-06-24T15:12:08.075164736Z SYSTEM: Error Exists! diff --git a/job_logs/job-gpu-62b94f70c584fdf74ee42dd3.log b/job_logs/job-gpu-62b94f70c584fdf74ee42dd3.log new file mode 100644 index 0000000..d7e9e9a --- /dev/null +++ b/job_logs/job-gpu-62b94f70c584fdf74ee42dd3.log @@ -0,0 +1,9 @@ +2022-06-27T06:34:35.306224872Z SYSTEM: Preparing env... +2022-06-27T06:34:35.980785521Z SYSTEM: Running... +2022-06-27T06:34:36.741505339Z 2022-06-27 14:34:36.738257: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:34:38.457388991Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:34:38.457442694Z Instructions for updating: +2022-06-27T06:34:38.457449025Z non-resource variables are not supported in the long term +2022-06-27T06:34:38.468014128Z Imported model (for Places365, 128x128 images) +2022-06-27T06:34:43.13214278Z Usage: python test.py [model_PATH] [in_PATH] [out_PATH] +2022-06-27T06:34:43.648430148Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b94f9cb17f87f3a6d7445b.log b/job_logs/job-gpu-62b94f9cb17f87f3a6d7445b.log new file mode 100644 index 0000000..45ca465 --- /dev/null +++ b/job_logs/job-gpu-62b94f9cb17f87f3a6d7445b.log @@ -0,0 +1,14 @@ +2022-06-27T06:35:15.051483054Z SYSTEM: Preparing env... +2022-06-27T06:35:15.65442522Z SYSTEM: Running... +2022-06-27T06:35:16.362517265Z 2022-06-27 14:35:16.361256: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:35:18.024936822Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:35:18.024986723Z Instructions for updating: +2022-06-27T06:35:18.024991459Z non-resource variables are not supported in the long term +2022-06-27T06:35:18.029401275Z Imported model (for Places365, 128x128 images) +2022-06-27T06:35:20.683442548Z Traceback (most recent call last): +2022-06-27T06:35:20.683486794Z File "src/test.py", line 28, in +2022-06-27T06:35:20.68361951Z G_sample = model.generator(G_Z) +2022-06-27T06:35:20.683632773Z File "/home/jovyan/work/src/model.py", line 12, in generator +2022-06-27T06:35:20.69064768Z with tf.variable_scope('G', reuse=tf.AUTO_REUSE): +2022-06-27T06:35:20.690694567Z AttributeError: module 'tensorflow' has no attribute 'variable_scope' +2022-06-27T06:35:21.152841351Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b94fd2f752e3e25d1d3e30.log b/job_logs/job-gpu-62b94fd2f752e3e25d1d3e30.log new file mode 100644 index 0000000..d5132d7 --- /dev/null +++ b/job_logs/job-gpu-62b94fd2f752e3e25d1d3e30.log @@ -0,0 +1,128 @@ +2022-06-27T06:36:09.110278599Z SYSTEM: Preparing env... +2022-06-27T06:36:09.755697526Z SYSTEM: Running... +2022-06-27T06:36:10.582438824Z 2022-06-27 14:36:10.578599: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:36:12.27114199Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:36:12.27118886Z Instructions for updating: +2022-06-27T06:36:12.271196741Z non-resource variables are not supported in the long term +2022-06-27T06:36:12.27588787Z Imported model (for Places365, 128x128 images) +2022-06-27T06:36:15.127571182Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:36:15.127625625Z Instructions for updating: +2022-06-27T06:36:15.127635424Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T06:36:15.127641681Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T06:36:15.127654313Z Instructions for updating: +2022-06-27T06:36:15.127659052Z Please use `layer.__call__` method instead. +2022-06-27T06:36:15.256535403Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:36:15.256581182Z Instructions for updating: +2022-06-27T06:36:15.256588253Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T06:36:15.327579181Z 2022-06-27 14:36:15.326484: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T06:36:15.413601745Z 2022-06-27 14:36:15.410378: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.413646851Z 2022-06-27 14:36:15.411277: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:36:15.413654172Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:36:15.413657624Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:36:15.413660889Z 2022-06-27 14:36:15.411345: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:36:15.424689012Z 2022-06-27 14:36:15.423389: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:36:15.432079412Z 2022-06-27 14:36:15.431364: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:36:15.496414752Z 2022-06-27 14:36:15.493736: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:36:15.593235058Z 2022-06-27 14:36:15.591720: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:36:15.599321627Z 2022-06-27 14:36:15.596773: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:36:15.805424915Z 2022-06-27 14:36:15.801512: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:36:15.805479188Z 2022-06-27 14:36:15.801981: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.805486999Z 2022-06-27 14:36:15.803112: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.805492776Z 2022-06-27 14:36:15.804002: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:36:15.805497988Z 2022-06-27 14:36:15.804847: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T06:36:15.805503237Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T06:36:15.81417475Z 2022-06-27 14:36:15.813899: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T06:36:15.814946288Z 2022-06-27 14:36:15.814691: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5547d70 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:36:15.814965987Z 2022-06-27 14:36:15.814721: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T06:36:15.978500134Z 2022-06-27 14:36:15.972620: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.978540247Z 2022-06-27 14:36:15.973668: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4916460 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:36:15.978547933Z 2022-06-27 14:36:15.973703: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T06:36:15.978553148Z 2022-06-27 14:36:15.974073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.978558435Z 2022-06-27 14:36:15.974960: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:36:15.978579396Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:36:15.978583413Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:36:15.978586337Z 2022-06-27 14:36:15.974996: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:36:15.978589107Z 2022-06-27 14:36:15.975027: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:36:15.97859187Z 2022-06-27 14:36:15.975052: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:36:15.978594744Z 2022-06-27 14:36:15.975075: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:36:15.978597775Z 2022-06-27 14:36:15.975098: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:36:15.978600631Z 2022-06-27 14:36:15.975121: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:36:15.978604493Z 2022-06-27 14:36:15.975144: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:36:15.978607328Z 2022-06-27 14:36:15.975351: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.978610359Z 2022-06-27 14:36:15.976383: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:15.978613279Z 2022-06-27 14:36:15.977323: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:36:15.978616007Z 2022-06-27 14:36:15.977373: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:36:16.61051414Z 2022-06-27 14:36:16.609849: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T06:36:16.610556469Z 2022-06-27 14:36:16.609912: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T06:36:16.61056136Z 2022-06-27 14:36:16.609925: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T06:36:16.621638578Z 2022-06-27 14:36:16.618766: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:16.621703314Z 2022-06-27 14:36:16.619824: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:36:16.621711487Z 2022-06-27 14:36:16.620774: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T06:36:16.659863984Z 2022-06-27 14:36:16.659585: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.661458563Z 2022-06-27 14:36:16.661189: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.66147741Z 2022-06-27 14:36:16.661256: W tensorflow/core/framework/op_kernel.cc:1767] OP_REQUIRES failed at save_restore_tensor.cc:182 : Data loss: Unable to open table file /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.858577056Z Traceback (most recent call last): +2022-06-27T06:36:16.858614223Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1365, in _do_call +2022-06-27T06:36:16.860723274Z return fn(*args) +2022-06-27T06:36:16.860770309Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1350, in _run_fn +2022-06-27T06:36:16.860778361Z target_list, run_metadata) +2022-06-27T06:36:16.860783251Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1443, in _call_tf_sessionrun +2022-06-27T06:36:16.860788557Z run_metadata) +2022-06-27T06:36:16.860793314Z tensorflow.python.framework.errors_impl.DataLossError: 2 root error(s) found. +2022-06-27T06:36:16.860798308Z (0) Data loss: Unable to open table file /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.860804505Z [[{{node save/RestoreV2}}]] +2022-06-27T06:36:16.860809493Z (1) Data loss: Unable to open table file /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.860815269Z [[{{node save/RestoreV2}}]] +2022-06-27T06:36:16.860819647Z [[save/RestoreV2/_17]] +2022-06-27T06:36:16.860824283Z 0 successful operations. +2022-06-27T06:36:16.860829198Z 0 derived errors ignored. +2022-06-27T06:36:16.860833772Z +2022-06-27T06:36:16.860838066Z During handling of the above exception, another exception occurred: +2022-06-27T06:36:16.86084295Z +2022-06-27T06:36:16.860847246Z Traceback (most recent call last): +2022-06-27T06:36:16.860865911Z File "src/test.py", line 33, in +2022-06-27T06:36:16.867479466Z saver.restore(sess, model_PATH) +2022-06-27T06:36:16.867497042Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 1299, in restore +2022-06-27T06:36:16.867501318Z {self.saver_def.filename_tensor_name: save_path}) +2022-06-27T06:36:16.867504328Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 958, in run +2022-06-27T06:36:16.867507425Z run_metadata_ptr) +2022-06-27T06:36:16.867510248Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1181, in _run +2022-06-27T06:36:16.867513326Z feed_dict_tensor, options, run_metadata) +2022-06-27T06:36:16.867516126Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1359, in _do_run +2022-06-27T06:36:16.867519148Z run_metadata) +2022-06-27T06:36:16.867521656Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1384, in _do_call +2022-06-27T06:36:16.867524457Z raise type(e)(node_def, op, message) +2022-06-27T06:36:16.867527053Z tensorflow.python.framework.errors_impl.DataLossError: 2 root error(s) found. +2022-06-27T06:36:16.867529858Z (0) Data loss: Unable to open table file /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.867532909Z [[node save/RestoreV2 (defined at src/test.py:30) ]] +2022-06-27T06:36:16.867535905Z (1) Data loss: Unable to open table file /home/jovyan/work/src/output/models: Failed precondition: /home/jovyan/work/src/output/models; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator? +2022-06-27T06:36:16.86753885Z [[node save/RestoreV2 (defined at src/test.py:30) ]] +2022-06-27T06:36:16.867541735Z [[save/RestoreV2/_17]] +2022-06-27T06:36:16.86754431Z 0 successful operations. +2022-06-27T06:36:16.867547008Z 0 derived errors ignored. +2022-06-27T06:36:16.86754958Z +2022-06-27T06:36:16.8675522Z Original stack trace for 'save/RestoreV2': +2022-06-27T06:36:16.867554898Z File "src/test.py", line 30, in +2022-06-27T06:36:16.86755794Z saver = tf.train.Saver() +2022-06-27T06:36:16.867560564Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 836, in __init__ +2022-06-27T06:36:16.867563425Z self.build() +2022-06-27T06:36:16.867565938Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 848, in build +2022-06-27T06:36:16.867580705Z self._build(self._filename, build_save=True, build_restore=True) +2022-06-27T06:36:16.867583844Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 886, in _build +2022-06-27T06:36:16.867586717Z build_restore=build_restore) +2022-06-27T06:36:16.86758929Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 516, in _build_internal +2022-06-27T06:36:16.867592137Z restore_sequentially, reshape) +2022-06-27T06:36:16.867594694Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 336, in _AddRestoreOps +2022-06-27T06:36:16.867597486Z restore_sequentially) +2022-06-27T06:36:16.867600011Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 583, in bulk_restore +2022-06-27T06:36:16.867602824Z return io_ops.restore_v2(filename_tensor, names, slices, dtypes) +2022-06-27T06:36:16.867605422Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1524, in restore_v2 +2022-06-27T06:36:16.867608998Z name=name) +2022-06-27T06:36:16.867611624Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 744, in _apply_op_helper +2022-06-27T06:36:16.867614426Z attrs=attr_protos, op_def=op_def) +2022-06-27T06:36:16.867616967Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3485, in _create_op_internal +2022-06-27T06:36:16.8676198Z op_def=op_def) +2022-06-27T06:36:16.867622461Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1949, in __init__ +2022-06-27T06:36:16.867625257Z self._traceback = tf_stack.extract_stack() +2022-06-27T06:36:16.86762792Z +2022-06-27T06:36:17.563235506Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b95050f752e3e25d1d3e32.log b/job_logs/job-gpu-62b95050f752e3e25d1d3e32.log new file mode 100644 index 0000000..de85ff8 --- /dev/null +++ b/job_logs/job-gpu-62b95050f752e3e25d1d3e32.log @@ -0,0 +1,67 @@ +2022-06-27T06:38:15.394534065Z SYSTEM: Preparing env... +2022-06-27T06:38:15.936080766Z SYSTEM: Running... +2022-06-27T06:38:16.723513366Z 2022-06-27 14:38:16.723151: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:38:18.316735193Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:38:18.316789541Z Instructions for updating: +2022-06-27T06:38:18.316798297Z non-resource variables are not supported in the long term +2022-06-27T06:38:18.323267476Z Imported model (for Places365, 128x128 images) +2022-06-27T06:38:21.064529748Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:38:21.064570688Z Instructions for updating: +2022-06-27T06:38:21.064600199Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T06:38:21.076688641Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T06:38:21.076716602Z Instructions for updating: +2022-06-27T06:38:21.076724171Z Please use `layer.__call__` method instead. +2022-06-27T06:38:21.221472244Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:38:21.221498205Z Instructions for updating: +2022-06-27T06:38:21.221506439Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T06:38:21.312483203Z 2022-06-27 14:38:21.311776: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T06:38:21.356502814Z 2022-06-27 14:38:21.355592: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.361476016Z 2022-06-27 14:38:21.356582: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:38:21.36149573Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:38:21.36151434Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:38:21.361522652Z 2022-06-27 14:38:21.356626: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:38:21.36152877Z 2022-06-27 14:38:21.359432: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:38:21.373639957Z 2022-06-27 14:38:21.362305: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:38:21.373676623Z 2022-06-27 14:38:21.362747: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:38:21.373685101Z 2022-06-27 14:38:21.365718: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:38:21.373690705Z 2022-06-27 14:38:21.367188: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:38:21.387364092Z 2022-06-27 14:38:21.374139: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:38:21.387383972Z 2022-06-27 14:38:21.374593: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.387390216Z 2022-06-27 14:38:21.376159: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.387406699Z 2022-06-27 14:38:21.377495: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:38:21.387412376Z 2022-06-27 14:38:21.378494: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T06:38:21.387418019Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T06:38:21.431497702Z 2022-06-27 14:38:21.431103: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T06:38:21.432398121Z 2022-06-27 14:38:21.432015: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5e57cd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:38:21.432415269Z 2022-06-27 14:38:21.432102: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T06:38:21.657683854Z 2022-06-27 14:38:21.654246: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.657726491Z 2022-06-27 14:38:21.655356: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4efe4c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:38:21.657733833Z 2022-06-27 14:38:21.655398: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T06:38:21.657739026Z 2022-06-27 14:38:21.655783: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.657757214Z 2022-06-27 14:38:21.656944: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:38:21.657780321Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:38:21.6577862Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:38:21.657790652Z 2022-06-27 14:38:21.656994: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:38:21.65779509Z 2022-06-27 14:38:21.657030: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:38:21.65779984Z 2022-06-27 14:38:21.657057: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:38:21.657804356Z 2022-06-27 14:38:21.657082: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:38:21.657808837Z 2022-06-27 14:38:21.657104: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:38:21.657813355Z 2022-06-27 14:38:21.657126: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:38:21.65781946Z 2022-06-27 14:38:21.657149: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:38:21.657823716Z 2022-06-27 14:38:21.657381: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.660267085Z 2022-06-27 14:38:21.660041: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:21.668181623Z 2022-06-27 14:38:21.661956: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:38:21.668216811Z 2022-06-27 14:38:21.662074: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:38:22.353724159Z 2022-06-27 14:38:22.351212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T06:38:22.35376374Z 2022-06-27 14:38:22.351279: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T06:38:22.353768071Z 2022-06-27 14:38:22.351336: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T06:38:22.364752412Z 2022-06-27 14:38:22.361278: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:22.364805738Z 2022-06-27 14:38:22.362655: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:38:22.364814235Z 2022-06-27 14:38:22.363649: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T06:38:22.375506339Z Traceback (most recent call last): +2022-06-27T06:38:22.375556853Z File "src/test.py", line 33, in +2022-06-27T06:38:22.386370827Z saver.restore(sess, model_PATH) +2022-06-27T06:38:22.386415106Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 1291, in restore +2022-06-27T06:38:22.386423007Z checkpoint_prefix) +2022-06-27T06:38:22.386428006Z ValueError: The passed save_path is not a valid checkpoint: /home/jovyan/work/src/output/models/ +2022-06-27T06:38:23.059942491Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b95091a393bd89f5bbaa0e.log b/job_logs/job-gpu-62b95091a393bd89f5bbaa0e.log new file mode 100644 index 0000000..505cdb4 --- /dev/null +++ b/job_logs/job-gpu-62b95091a393bd89f5bbaa0e.log @@ -0,0 +1,71 @@ +2022-06-27T06:39:32.227411156Z SYSTEM: Preparing env... +2022-06-27T06:39:32.755526021Z SYSTEM: Running... +2022-06-27T06:39:33.491524252Z 2022-06-27 14:39:33.491184: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:39:35.12676689Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:39:35.126797199Z Instructions for updating: +2022-06-27T06:39:35.12680371Z non-resource variables are not supported in the long term +2022-06-27T06:39:35.131545901Z Imported model (for Places365, 128x128 images) +2022-06-27T06:39:37.832567591Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:39:37.832620614Z Instructions for updating: +2022-06-27T06:39:37.832628947Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T06:39:37.838525717Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T06:39:37.838587273Z Instructions for updating: +2022-06-27T06:39:37.838594925Z Please use `layer.__call__` method instead. +2022-06-27T06:39:37.969525841Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:39:37.969573824Z Instructions for updating: +2022-06-27T06:39:37.969580998Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T06:39:38.049508225Z 2022-06-27 14:39:38.048495: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T06:39:38.143562415Z 2022-06-27 14:39:38.133245: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.143609396Z 2022-06-27 14:39:38.134162: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:39:38.143614886Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:39:38.143619664Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:39:38.143624592Z 2022-06-27 14:39:38.134214: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:39:38.143629424Z 2022-06-27 14:39:38.136831: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:39:38.14363438Z 2022-06-27 14:39:38.139753: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:39:38.143656537Z 2022-06-27 14:39:38.140190: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:39:38.15524677Z 2022-06-27 14:39:38.143579: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:39:38.15526647Z 2022-06-27 14:39:38.145303: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:39:38.155272765Z 2022-06-27 14:39:38.151591: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:39:38.155278041Z 2022-06-27 14:39:38.151929: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.155313673Z 2022-06-27 14:39:38.153038: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.155320285Z 2022-06-27 14:39:38.154006: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:39:38.155324938Z 2022-06-27 14:39:38.154836: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T06:39:38.15533058Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T06:39:38.169655627Z 2022-06-27 14:39:38.169268: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T06:39:38.170861573Z 2022-06-27 14:39:38.170220: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5c680c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:39:38.170882428Z 2022-06-27 14:39:38.170264: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T06:39:38.351589663Z 2022-06-27 14:39:38.351012: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.374522059Z 2022-06-27 14:39:38.352464: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2b14430 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:39:38.37457054Z 2022-06-27 14:39:38.352532: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T06:39:38.37457901Z 2022-06-27 14:39:38.353104: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.374585377Z 2022-06-27 14:39:38.353982: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:39:38.374607731Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:39:38.374614597Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:39:38.374619062Z 2022-06-27 14:39:38.354019: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:39:38.37462404Z 2022-06-27 14:39:38.354052: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:39:38.374628981Z 2022-06-27 14:39:38.354080: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:39:38.374634219Z 2022-06-27 14:39:38.354105: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:39:38.374639131Z 2022-06-27 14:39:38.354129: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:39:38.374644441Z 2022-06-27 14:39:38.354152: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:39:38.374649481Z 2022-06-27 14:39:38.354189: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:39:38.374657131Z 2022-06-27 14:39:38.354423: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.374663194Z 2022-06-27 14:39:38.355564: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:38.374668509Z 2022-06-27 14:39:38.356746: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:39:38.374673859Z 2022-06-27 14:39:38.356826: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:39:39.03151241Z 2022-06-27 14:39:39.030685: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T06:39:39.031555785Z 2022-06-27 14:39:39.030753: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T06:39:39.031562668Z 2022-06-27 14:39:39.030766: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T06:39:39.041913611Z 2022-06-27 14:39:39.039050: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:39.041959612Z 2022-06-27 14:39:39.040117: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:39:39.041968069Z 2022-06-27 14:39:39.040997: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T06:39:39.350472788Z 2022-06-27 14:39:39.345683: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:39:42.628125954Z 2022-06-27 14:39:42.623595: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:39:44.061667112Z Traceback (most recent call last): +2022-06-27T06:39:44.061719351Z File "src/test.py", line 35, in +2022-06-27T06:39:44.062379741Z util.save_image(output[0], out_PATH) +2022-06-27T06:39:44.062396766Z File "/home/jovyan/work/src/util.py", line 89, in save_image +2022-06-27T06:39:44.062672574Z img.save(name, format='PNG') +2022-06-27T06:39:44.062683885Z File "/home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/PIL/Image.py", line 2161, in save +2022-06-27T06:39:44.071457371Z fp = builtins.open(filename, "w+b") +2022-06-27T06:39:44.071493873Z IsADirectoryError: [Errno 21] Is a directory: '/home/jovyan/work/results/' +2022-06-27T06:39:45.129121047Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b950d6f9c7fbd55d4e9e0f.log b/job_logs/job-gpu-62b950d6f9c7fbd55d4e9e0f.log new file mode 100644 index 0000000..701ba9c --- /dev/null +++ b/job_logs/job-gpu-62b950d6f9c7fbd55d4e9e0f.log @@ -0,0 +1,63 @@ +2022-06-27T06:40:28.664029384Z SYSTEM: Preparing env... +2022-06-27T06:40:29.161948578Z SYSTEM: Running... +2022-06-27T06:40:29.90655155Z 2022-06-27 14:40:29.905643: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:40:31.552840979Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T06:40:31.552884694Z Instructions for updating: +2022-06-27T06:40:31.552891704Z non-resource variables are not supported in the long term +2022-06-27T06:40:31.557248833Z Imported model (for Places365, 128x128 images) +2022-06-27T06:40:34.255714665Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:40:34.255762327Z Instructions for updating: +2022-06-27T06:40:34.255770327Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T06:40:34.255776823Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T06:40:34.255788806Z Instructions for updating: +2022-06-27T06:40:34.255793807Z Please use `layer.__call__` method instead. +2022-06-27T06:40:34.385566834Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T06:40:34.385615911Z Instructions for updating: +2022-06-27T06:40:34.385623287Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T06:40:34.46652205Z 2022-06-27 14:40:34.463569: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T06:40:34.587041885Z 2022-06-27 14:40:34.558264: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.587091586Z 2022-06-27 14:40:34.559496: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:40:34.587099226Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:40:34.587104431Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:40:34.587110049Z 2022-06-27 14:40:34.559547: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:40:34.587115546Z 2022-06-27 14:40:34.563201: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:40:34.587121023Z 2022-06-27 14:40:34.566815: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:40:34.587142859Z 2022-06-27 14:40:34.567434: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:40:34.587149143Z 2022-06-27 14:40:34.571508: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:40:34.587154254Z 2022-06-27 14:40:34.573557: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:40:34.587159273Z 2022-06-27 14:40:34.581702: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:40:34.58717697Z 2022-06-27 14:40:34.582161: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.587184192Z 2022-06-27 14:40:34.583629: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.587191825Z 2022-06-27 14:40:34.584731: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:40:34.587196874Z 2022-06-27 14:40:34.585721: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T06:40:34.587202666Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T06:40:34.605273814Z 2022-06-27 14:40:34.604881: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T06:40:34.612440387Z 2022-06-27 14:40:34.611455: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x43ef4a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:40:34.612469486Z 2022-06-27 14:40:34.611536: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T06:40:34.818065383Z 2022-06-27 14:40:34.816854: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.818226758Z 2022-06-27 14:40:34.817945: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1133430 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T06:40:34.818242687Z 2022-06-27 14:40:34.818059: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T06:40:34.819098669Z 2022-06-27 14:40:34.818920: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.826000267Z 2022-06-27 14:40:34.820425: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T06:40:34.826040277Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T06:40:34.826047919Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T06:40:34.82605344Z 2022-06-27 14:40:34.820483: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:40:34.82605896Z 2022-06-27 14:40:34.820538: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:40:34.826064758Z 2022-06-27 14:40:34.820589: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T06:40:34.826069913Z 2022-06-27 14:40:34.820637: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T06:40:34.826075151Z 2022-06-27 14:40:34.820683: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T06:40:34.826080279Z 2022-06-27 14:40:34.820757: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T06:40:34.826085367Z 2022-06-27 14:40:34.820807: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:40:34.826090711Z 2022-06-27 14:40:34.821073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.8260964Z 2022-06-27 14:40:34.822732: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:34.826101603Z 2022-06-27 14:40:34.824064: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T06:40:34.826106851Z 2022-06-27 14:40:34.824140: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T06:40:35.48051732Z 2022-06-27 14:40:35.478735: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T06:40:35.480572834Z 2022-06-27 14:40:35.478800: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T06:40:35.480580695Z 2022-06-27 14:40:35.478813: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T06:40:35.491016804Z 2022-06-27 14:40:35.488592: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:35.492925518Z 2022-06-27 14:40:35.491127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T06:40:35.492963862Z 2022-06-27 14:40:35.492080: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T06:40:35.830526984Z 2022-06-27 14:40:35.825663: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T06:40:37.078582913Z 2022-06-27 14:40:37.071379: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T06:40:39.337113298Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b96ec3f901e7972521f6bc.log b/job_logs/job-gpu-62b96ec3f901e7972521f6bc.log new file mode 100644 index 0000000..d102a1b --- /dev/null +++ b/job_logs/job-gpu-62b96ec3f901e7972521f6bc.log @@ -0,0 +1,63 @@ +2022-06-27T08:48:09.24239905Z SYSTEM: Preparing env... +2022-06-27T08:48:09.786941439Z SYSTEM: Running... +2022-06-27T08:48:10.51858763Z 2022-06-27 16:48:10.517635: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:48:12.178056383Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T08:48:12.178109572Z Instructions for updating: +2022-06-27T08:48:12.178117458Z non-resource variables are not supported in the long term +2022-06-27T08:48:12.183152171Z Imported model (for Places365, 128x128 images) +2022-06-27T08:48:14.874564815Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T08:48:14.87460536Z Instructions for updating: +2022-06-27T08:48:14.874612991Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T08:48:14.879516836Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T08:48:14.879552898Z Instructions for updating: +2022-06-27T08:48:14.879556966Z Please use `layer.__call__` method instead. +2022-06-27T08:48:14.998783159Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T08:48:14.99882407Z Instructions for updating: +2022-06-27T08:48:14.998827896Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T08:48:15.082132374Z 2022-06-27 16:48:15.075591: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T08:48:15.167539823Z 2022-06-27 16:48:15.154770: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.167590578Z 2022-06-27 16:48:15.156152: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T08:48:15.16759558Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T08:48:15.167598782Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T08:48:15.167602013Z 2022-06-27 16:48:15.156213: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:48:15.167605072Z 2022-06-27 16:48:15.160218: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:48:15.167608115Z 2022-06-27 16:48:15.163802: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T08:48:15.167628968Z 2022-06-27 16:48:15.164231: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T08:48:15.172464872Z 2022-06-27 16:48:15.167436: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T08:48:15.17248651Z 2022-06-27 16:48:15.168987: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T08:48:15.179400732Z 2022-06-27 16:48:15.175372: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:48:15.179421972Z 2022-06-27 16:48:15.175712: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.179428301Z 2022-06-27 16:48:15.176844: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.17943329Z 2022-06-27 16:48:15.177689: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T08:48:15.179438827Z 2022-06-27 16:48:15.178507: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T08:48:15.179443601Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T08:48:15.189791017Z 2022-06-27 16:48:15.189490: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T08:48:15.190826889Z 2022-06-27 16:48:15.190353: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5d4d3b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T08:48:15.190840436Z 2022-06-27 16:48:15.190405: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T08:48:15.354103229Z 2022-06-27 16:48:15.352890: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.360495813Z 2022-06-27 16:48:15.353962: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2a91430 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T08:48:15.36051428Z 2022-06-27 16:48:15.354039: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T08:48:15.360518449Z 2022-06-27 16:48:15.354703: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.360521962Z 2022-06-27 16:48:15.355674: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T08:48:15.360540021Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T08:48:15.360543183Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T08:48:15.360546053Z 2022-06-27 16:48:15.355713: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:48:15.360548859Z 2022-06-27 16:48:15.355757: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:48:15.360551628Z 2022-06-27 16:48:15.355789: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T08:48:15.360554374Z 2022-06-27 16:48:15.355815: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T08:48:15.360557367Z 2022-06-27 16:48:15.355840: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T08:48:15.360560106Z 2022-06-27 16:48:15.355863: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T08:48:15.360562889Z 2022-06-27 16:48:15.355889: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:48:15.360566266Z 2022-06-27 16:48:15.356094: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.360569337Z 2022-06-27 16:48:15.357167: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:15.360572245Z 2022-06-27 16:48:15.358197: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T08:48:15.360574967Z 2022-06-27 16:48:15.358256: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:48:16.025531234Z 2022-06-27 16:48:16.024309: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T08:48:16.02556822Z 2022-06-27 16:48:16.024381: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T08:48:16.02557269Z 2022-06-27 16:48:16.024394: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T08:48:16.036018785Z 2022-06-27 16:48:16.033186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:16.036045308Z 2022-06-27 16:48:16.034241: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:48:16.036052087Z 2022-06-27 16:48:16.035136: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T08:48:16.324801304Z 2022-06-27 16:48:16.323386: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:48:17.556743331Z 2022-06-27 16:48:17.551710: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:48:19.783798978Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b9701b8029151df74612ce.log b/job_logs/job-gpu-62b9701b8029151df74612ce.log new file mode 100644 index 0000000..8c40844 --- /dev/null +++ b/job_logs/job-gpu-62b9701b8029151df74612ce.log @@ -0,0 +1,15 @@ +2022-06-27T08:53:53.960313457Z SYSTEM: Preparing env... +2022-06-27T08:53:54.530950342Z SYSTEM: Running... +2022-06-27T08:53:55.275717821Z 2022-06-27 16:53:55.274909: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:53:56.910131992Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T08:53:56.910179395Z Instructions for updating: +2022-06-27T08:53:56.91018644Z non-resource variables are not supported in the long term +2022-06-27T08:53:56.915074142Z Imported model (for Places365, 128x128 images) +2022-06-27T08:53:59.436145133Z Traceback (most recent call last): +2022-06-27T08:53:59.436188131Z File "src/test.py", line 26, in +2022-06-27T08:53:59.436409259Z img_p = util.preprocess_images_outpainting(img) +2022-06-27T08:53:59.43642622Z File "/home/jovyan/work/src/util.py", line 58, in preprocess_images_outpainting +2022-06-27T08:53:59.436715361Z imgs_p = np.concatenate((imgs, mask), axis=3) +2022-06-27T08:53:59.436729791Z File "<__array_function__ internals>", line 6, in concatenate +2022-06-27T08:53:59.443505289Z ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 250 and the array at index 1 has size 128 +2022-06-27T08:53:59.902211373Z SYSTEM: Finishing... diff --git a/job_logs/job-gpu-62b971036452cd65a61ca625.log b/job_logs/job-gpu-62b971036452cd65a61ca625.log new file mode 100644 index 0000000..4504859 --- /dev/null +++ b/job_logs/job-gpu-62b971036452cd65a61ca625.log @@ -0,0 +1,63 @@ +2022-06-27T08:57:46.196243927Z SYSTEM: Preparing env... +2022-06-27T08:57:46.729899376Z SYSTEM: Running... +2022-06-27T08:57:47.457595515Z 2022-06-27 16:57:47.456138: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:57:49.146821587Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. +2022-06-27T08:57:49.146871573Z Instructions for updating: +2022-06-27T08:57:49.146880336Z non-resource variables are not supported in the long term +2022-06-27T08:57:49.151937708Z Imported model (for Places365, 128x128 images) +2022-06-27T08:57:51.925523771Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:20: conv2d (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T08:57:51.925574935Z Instructions for updating: +2022-06-27T08:57:51.925582982Z Use `tf.keras.layers.Conv2D` instead. +2022-06-27T08:57:51.938638561Z WARNING:tensorflow:From /home/jovyan/.virtualenvs/basenv/lib/python3.7/site-packages/tensorflow/python/keras/legacy_tf_layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer_v1) is deprecated and will be removed in a future version. +2022-06-27T08:57:51.938691742Z Instructions for updating: +2022-06-27T08:57:51.938700174Z Please use `layer.__call__` method instead. +2022-06-27T08:57:52.053507181Z WARNING:tensorflow:From /home/jovyan/work/src/model.py:79: conv2d_transpose (from tensorflow.python.keras.legacy_tf_layers.convolutional) is deprecated and will be removed in a future version. +2022-06-27T08:57:52.053542901Z Instructions for updating: +2022-06-27T08:57:52.053549331Z Use `tf.keras.layers.Conv2DTranspose` instead. +2022-06-27T08:57:52.137499385Z 2022-06-27 16:57:52.133265: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1 +2022-06-27T08:57:52.241583163Z 2022-06-27 16:57:52.233707: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.241641201Z 2022-06-27 16:57:52.234628: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T08:57:52.241649804Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T08:57:52.241654633Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T08:57:52.241659849Z 2022-06-27 16:57:52.234668: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:57:52.241664684Z 2022-06-27 16:57:52.237458: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:57:52.241669496Z 2022-06-27 16:57:52.240254: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T08:57:52.241690618Z 2022-06-27 16:57:52.240676: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T08:57:52.250526608Z 2022-06-27 16:57:52.243841: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T08:57:52.250549004Z 2022-06-27 16:57:52.245384: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T08:57:52.255036662Z 2022-06-27 16:57:52.251546: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:57:52.255059262Z 2022-06-27 16:57:52.251873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.255067412Z 2022-06-27 16:57:52.252957: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.255073068Z 2022-06-27 16:57:52.253796: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T08:57:52.25507835Z 2022-06-27 16:57:52.254586: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA +2022-06-27T08:57:52.255083681Z To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2022-06-27T08:57:52.271420544Z 2022-06-27 16:57:52.270498: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2499990000 Hz +2022-06-27T08:57:52.271743118Z 2022-06-27 16:57:52.271455: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4747740 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2022-06-27T08:57:52.271767039Z 2022-06-27 16:57:52.271509: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2022-06-27T08:57:52.444513161Z 2022-06-27 16:57:52.438561: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.444560328Z 2022-06-27 16:57:52.439606: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x44724a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2022-06-27T08:57:52.444568583Z 2022-06-27 16:57:52.439644: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0 +2022-06-27T08:57:52.444573851Z 2022-06-27 16:57:52.440010: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.444579411Z 2022-06-27 16:57:52.440946: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: +2022-06-27T08:57:52.444601594Z pciBusID: 0000:00:08.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +2022-06-27T08:57:52.444607895Z coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 7.75GiB deviceMemoryBandwidth: 681.88GiB/s +2022-06-27T08:57:52.44461265Z 2022-06-27 16:57:52.440984: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:57:52.444617514Z 2022-06-27 16:57:52.441015: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:57:52.444622224Z 2022-06-27 16:57:52.441041: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10 +2022-06-27T08:57:52.444627069Z 2022-06-27 16:57:52.441064: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10 +2022-06-27T08:57:52.444631482Z 2022-06-27 16:57:52.441085: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10 +2022-06-27T08:57:52.44463649Z 2022-06-27 16:57:52.441106: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10 +2022-06-27T08:57:52.444642981Z 2022-06-27 16:57:52.441130: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:57:52.444648137Z 2022-06-27 16:57:52.441319: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.444653345Z 2022-06-27 16:57:52.442307: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:52.444658464Z 2022-06-27 16:57:52.443119: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0 +2022-06-27T08:57:52.444662832Z 2022-06-27 16:57:52.443165: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1 +2022-06-27T08:57:53.113557514Z 2022-06-27 16:57:53.112857: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix: +2022-06-27T08:57:53.113618014Z 2022-06-27 16:57:53.112923: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0 +2022-06-27T08:57:53.113626372Z 2022-06-27 16:57:53.112936: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N +2022-06-27T08:57:53.125264805Z 2022-06-27 16:57:53.122189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:53.125308939Z 2022-06-27 16:57:53.123241: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2022-06-27T08:57:53.125317622Z 2022-06-27 16:57:53.124239: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7116 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:08.0, compute capability: 6.0) +2022-06-27T08:57:53.434540742Z 2022-06-27 16:57:53.425442: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7 +2022-06-27T08:57:54.72057696Z 2022-06-27 16:57:54.713817: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10 +2022-06-27T08:57:57.027310115Z SYSTEM: Finishing... diff --git a/poster/.ipynb_checkpoints/msabini-gili__image-outpainting-poster-checkpoint.pdf b/poster/.ipynb_checkpoints/msabini-gili__image-outpainting-poster-checkpoint.pdf new file mode 100755 index 0000000..0d1eb5e Binary files /dev/null and b/poster/.ipynb_checkpoints/msabini-gili__image-outpainting-poster-checkpoint.pdf differ diff --git a/project_requirements.txt b/project_requirements.txt new file mode 100644 index 0000000..0a645d9 --- /dev/null +++ b/project_requirements.txt @@ -0,0 +1,140 @@ +murmurhash==1.0.6 +jmespath==0.10.0 +configparser==5.2.0 +imbalanced-learn==0.6.2 +Augmentor==0.2.8 +scikit-image==0.15.0 +tensorboard-data-server==0.6.1 +requests-oauthlib==1.3.1 +argon2-cffi-bindings==21.2.0 +thinc==7.4.1 +charset-normalizer==2.0.12 +nltk==3.5 +transformers==4.1.1 +astunparse==1.6.3 +PyWavelets==1.2.0 +semantic-version==2.8.5 +sentencepiece==0.1.91 +PyAudio==0.2.11 +greenlet==1.1.2 +ruamel.yaml.clib==0.2.6 +pyasn1==0.4.8 +attrs==19.3.0 +retrying==1.3.3 +torchtext==0.6.0 +func-timeout==4.3.5 +gym==0.15.7 +importlib-metadata==4.11.1 +pyasn1-modules==0.2.8 +cmake==3.21.1 +word2vec==0.11.1 +packaging==21.3 +preshed==3.0.6 +regex==2022.1.18 +tensorflow-privacy==0.5.2 +rsa==4.8 +easydict==1.9 +spacy==2.3.2 +tensorboardX==2.0 +defusedxml==0.7.1 +s3transfer==0.3.3 +networkx==2.6.3 +catalogue==1.0.0 +openpyxl==2.6.4 +dm-tree==0.1.6 +imageio==2.8.0 +metakernel==0.28.2 +opt-einsum==3.3.0 +cachetools==3.1.1 +smart-open==5.2.1 +multipledispatch==0.6.0 +argon2-cffi==21.3.0 +tinycss2==1.1.1 +graphviz==0.14 +minio==5.0.10 +boto3==1.16.25 +unification==0.2.2 +oauthlib==3.2.0 +google-auth==2.6.0 +gensim==3.8.3 +tensorboard-plugin-wit==1.8.1 +pytorch-pretrained-bert==0.6.2 +plotly==4.8.1 +joblib==1.1.0 +pydot==1.4.1 +sacremoses==0.0.47 +calysto==1.0.6 +mpmath==1.2.1 +toolz==0.11.2 +en-core-web-sm==https://files.momodel.cn/en_core_web_sm-2.3.0.tar.gz +rouge==1.0.0 +plac==1.1.3 +importlib-resources==5.4.0 +et-xmlfile==1.1.0 +typing-extensions==4.1.1 +certipy==0.1.3 +platformdirs==2.5.1 +debugpy==1.5.1 +kanren==0.2.3 +cryptography==36.0.1 +python-json-logger==2.0.2 +ruamel.yaml==0.17.21 +pyglet==1.5.0 +cymem==2.0.6 +Shapely==1.7.0 +click==8.0.4 +jdcal==1.4.1 +jupyter-telemetry==0.1.0 +tensorflow-federated==0.17.0 +tensorflow-estimator==2.3.0 +cloudpickle==1.2.2 +jupyterlab-server==0.2.0 +nest-asyncio==1.5.4 +mindspore==https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.0.0/MindSpore/cpu/ubuntu_x86/mindspore-1.0.0-cp37-cp37m-linux_x86_64.whl +google-pasta==0.2.0 +async-generator==1.10 +yellowbrick==1.1 +tf-slim==1.1.0 +xlrd==1.2.0 +numpyencoder==0.3.0 +copulas==0.3.3 +torch==1.4.0+cpu +typeguard==2.13.3 +pyOpenSSL==22.0.0 +matplotlib-inline==0.1.3 +tqdm==4.46.1 +torchvision==0.5.0+cpu +wrapt==1.13.3 +google-auth-oauthlib==0.4.6 +pycparser==2.21 +filelock==3.6.0 +botocore==1.19.25 +XlsxWriter==1.4.3 +dlib==19.22.0 +portpicker==1.3.9 +pytorch-transformers==1.2.0 +srsly==1.0.5 +distlib==0.3.4 +Cython==0.29.20 +install==1.3.5 +cssselect2==0.4.1 +CairoSVG==2.5.2 +pygame==2.0.1 +ipdb==0.13.2 +zipp==3.7.0 +tensorflow-model-optimization==0.4.1 +wasabi==0.9.0 +svgwrite==1.4.1 +baytune==0.3.12 +jieba==0.42.1 +minepy==1.2.4 +cffi==1.15.0 +blis==0.4.1 +paddlepaddle==2.0.1 +cairocffi==1.3.0 +tensorflow-addons==0.11.2 +sympy==1.6.2 +pyrsistent==0.18.1 +imgaug==0.4.0 +asttokens==2.0.5 +tokenizers==0.9.4 diff --git a/results/.ipynb_checkpoints/README-checkpoint.md b/results/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000..81902f1 --- /dev/null +++ b/results/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,2 @@ +Please store your training checkpoints or results here +请在此处存储 checkpoints 和结果文件 \ No newline at end of file diff --git a/results/.ipynb_checkpoints/city_output-checkpoint.png b/results/.ipynb_checkpoints/city_output-checkpoint.png new file mode 100644 index 0000000..ef334ed Binary files /dev/null and b/results/.ipynb_checkpoints/city_output-checkpoint.png differ diff --git a/results/.ipynb_checkpoints/test_output-checkpoint.png b/results/.ipynb_checkpoints/test_output-checkpoint.png new file mode 100644 index 0000000..3aad43a Binary files /dev/null and b/results/.ipynb_checkpoints/test_output-checkpoint.png differ diff --git a/results/city_output.png b/results/city_output.png new file mode 100644 index 0000000..4c58e77 Binary files /dev/null and b/results/city_output.png differ diff --git a/results/test_output.png b/results/test_output.png new file mode 100644 index 0000000..3aad43a Binary files /dev/null and b/results/test_output.png differ diff --git a/src/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/src/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..4f73a2b --- /dev/null +++ b/src/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "db0b0a11", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-06-27 17:55:39.372674: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory\n", + "2022-06-27 17:55:39.372709: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "non-resource variables are not supported in the long term\n", + "Imported model (for Places365, 128x128 images)\n" + ] + } + ], + "source": [ + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "tf.reset_default_graph()\n", + "import numpy as np\n", + "from PIL import Image\n", + "import model\n", + "import util\n", + "import os\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "377355df", + "metadata": {}, + "outputs": [], + "source": [ + "model_PATH='/home/jovyan/work/src/output/models/model2000.ckpt'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fd4ec8ee", + "metadata": {}, + "outputs": [], + "source": [ + "def load_demo_image(in_PATH):\n", + " img = np.array(Image.open(in_PATH).convert('RGB'))[np.newaxis] / 255.0\n", + " img_p = util.preprocess_images_outpainting(img)\n", + " return img_p" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cc76d61f", + "metadata": {}, + "outputs": [], + "source": [ + "def inference(model_PATH, img_p):\n", + " G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z')\n", + " G_sample = model.generator(G_Z)\n", + " \n", + " saver = tf.train.Saver()\n", + " with tf.Session() as sess:\n", + " saver.restore(sess, model_PATH)\n", + " output, = sess.run([G_sample], feed_dict={G_Z: img_p})\n", + " img_norm = (output[0] * 255.0).astype(np.uint8)\n", + " img = Image.fromarray(img_norm, 'RGB')\n", + " #util.save_image(output[0], out_PATH)\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d93edf59", + "metadata": {}, + "outputs": [], + "source": [ + "def handle(conf):\n", + " \"\"\"\n", + " 该方法是部署之后,其他人调用你的服务时候的处理方法。\n", + " 请按规范填写参数结构,这样我们就能替你自动生成配置文件,方便其他人的调用。\n", + " 范例:\n", + " params['key'] = value # value_type: str # description: some description\n", + " value_type 可以选择:img, video, audio, str, int, float, [int], [str], [float]\n", + " 参数请放到params字典中,我们会自动解析该变量。\n", + " \"\"\"\n", + " base64_str = conf['Photo']\n", + " image = load_demo_image(base64_str, image_size, device)\n", + " res = inference(model, image)\n", + " # add your code\n", + " return {'Output': res}\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/.ipynb_checkpoints/model-checkpoint.py b/src/.ipynb_checkpoints/model-checkpoint.py new file mode 100644 index 0000000..77b1317 --- /dev/null +++ b/src/.ipynb_checkpoints/model-checkpoint.py @@ -0,0 +1,158 @@ +# proj: image-outpainting +# file: model.py +# authors: Mark Sabini, Gili Rusak +# desc: Model for outpainting on 128x128 images with only +# a global discriminator. +# ------------------------------------------------------------- +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + +print('Imported model (for Places365, 128x128 images)') + +def generator(z): + with tf.variable_scope('G', reuse=tf.AUTO_REUSE): + conv1 = tf.layers.conv2d( + inputs=z, + filters=64, + kernel_size=[5, 5], + strides=(1, 1), + padding="same", + activation=tf.nn.relu) + + conv2 = tf.layers.conv2d( + inputs=conv1, + filters=128, + kernel_size=[3, 3], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv3 = tf.layers.conv2d( + inputs=conv2, + filters=256, + kernel_size=[3, 3], + strides=(1, 1), + padding="same", + activation=tf.nn.relu) + + conv4 = tf.layers.conv2d( + inputs=conv3, + filters=256, + kernel_size=[3, 3], + strides=(1, 1), + dilation_rate=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv5 = tf.layers.conv2d( + inputs=conv4, + filters=256, + kernel_size=[3, 3], + strides=(1, 1), + dilation_rate=(4, 4), + padding="same", + activation=tf.nn.relu) + + conv5_p = tf.layers.conv2d( + inputs=conv5, + filters=256, + kernel_size=[3, 3], + strides=(1, 1), + dilation_rate=(8, 8), + padding="same", + activation=tf.nn.relu) + + conv6 = tf.layers.conv2d( + inputs=conv5_p, + filters=256, + kernel_size=[3, 3], + strides=(1, 1), + padding="same", + activation=tf.nn.relu) + + deconv7 = tf.layers.conv2d_transpose( + inputs=conv6, + filters=128, + kernel_size=[4, 4], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv8 = tf.layers.conv2d( + inputs=deconv7, + filters=64, + kernel_size=[3, 3], + strides=(1, 1), + padding="same", + activation=tf.nn.relu) + + out = tf.layers.conv2d( + inputs=conv8, + filters=3, + kernel_size=[3, 3], + strides=(1, 1), + padding="same", + activation=tf.sigmoid) + + return out + +def global_discriminator(x): + with tf.variable_scope('DG', reuse=tf.AUTO_REUSE): + conv1 = tf.layers.conv2d( + inputs=x, + filters=32, + kernel_size=[5, 5], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv2 = tf.layers.conv2d( + inputs=conv1, + filters=64, + kernel_size=[5, 5], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv3 = tf.layers.conv2d( + inputs=conv2, + filters=64, + kernel_size=[5, 5], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv4 = tf.layers.conv2d( + inputs=conv3, + filters=64, + kernel_size=[5, 5], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv5 = tf.layers.conv2d( + inputs=conv4, + filters=64, + kernel_size=[5, 5], + strides=(2, 2), + padding="same", + activation=tf.nn.relu) + + conv5_flat = tf.layers.flatten( + inputs=conv5) + + dense6 = tf.layers.dense( + inputs=conv5_flat, + units=512, + activation=tf.nn.relu) + + return dense6 + +def concatenator(global_x): + with tf.variable_scope('C', reuse=tf.AUTO_REUSE): + dense1 = tf.layers.dense( + inputs=global_x, + units=1, + activation=tf.sigmoid) + + return dense1 diff --git a/src/.ipynb_checkpoints/run-checkpoint.sh b/src/.ipynb_checkpoints/run-checkpoint.sh new file mode 100644 index 0000000..906dc92 --- /dev/null +++ b/src/.ipynb_checkpoints/run-checkpoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +# Runs train.py and saves the console output to output/out +stdbuf -i0 -o0 -e0 python -u train.py | tee output/out diff --git a/src/.ipynb_checkpoints/run_ld-checkpoint.sh b/src/.ipynb_checkpoints/run_ld-checkpoint.sh new file mode 100644 index 0000000..533429a --- /dev/null +++ b/src/.ipynb_checkpoints/run_ld-checkpoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +# Runs train_ld.py and saves the console output to output/out +stdbuf -i0 -o0 -e0 python -u train_ld.py | tee output/out diff --git a/src/.ipynb_checkpoints/test-checkpoint.py b/src/.ipynb_checkpoints/test-checkpoint.py new file mode 100644 index 0000000..7767aae --- /dev/null +++ b/src/.ipynb_checkpoints/test-checkpoint.py @@ -0,0 +1,36 @@ +# proj: image-outpainting +# file: test.py +# authors: Mark Sabini, Gili Rusak +# desc: Script for simulating the training pipeline. Masks out +# the sides of an image, feeds it through the network, and +# compares the network output to the original image. +# ------------------------------------------------------------- +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import numpy as np +from PIL import Image +import model +import util +import os +import sys + +model_PATH='/home/jovyan/work/src/output/models/model2000.ckpt' +in_PATH='/home/jovyan/work/images/test.png' +out_PATH='/home/jovyan/work/results/test_output.png' + +tf.reset_default_graph() + +IMAGE_SZ = 128 + +img = np.array(Image.open(in_PATH).convert('RGB'))[np.newaxis] / 255.0 +img_p = util.preprocess_images_outpainting(img) + +G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') +G_sample = model.generator(G_Z) + +saver = tf.train.Saver() + +with tf.Session() as sess: + saver.restore(sess, model_PATH) + output, = sess.run([G_sample], feed_dict={G_Z: img_p}) + util.save_image(output[0], out_PATH) diff --git a/src/.ipynb_checkpoints/train-checkpoint.py b/src/.ipynb_checkpoints/train-checkpoint.py new file mode 100644 index 0000000..c1ff616 --- /dev/null +++ b/src/.ipynb_checkpoints/train-checkpoint.py @@ -0,0 +1,202 @@ +# proj: image-outpainting +# file: train.py +# authors: Mark Sabini, Gili Rusak +# desc: Train the model specified in model.py, which only +# uses a global discriminator. +# ------------------------------------------------------------- +import tensorflow.compat.v1 as tf +import numpy as np +from PIL import Image +import model +import util +import os +import sys +from tensorflow.python.framework import ops +tf.disable_eager_execution() +ops.reset_default_graph() + +# Places365 Training Hyperparameters +BATCH_SZ = 16 +VERBOSE = False +EPSILON = 1e-9 +IMAGE_SZ = 128 +OUT_DIR = 'output' +MODEL_DIR = os.path.join(OUT_DIR, 'models') +INFO_PATH = os.path.join(OUT_DIR, 'run.txt') +N_TEST = 10 +N_ITERS = 227500 +N_ITERS_P1 = 40950 # How many iterations to train in phase 1 +N_ITERS_P2 = 4550 # How many iterations to train in phase 2 +INTV_PRINT = 200 # How often to print +INTV_SAVE = 1000 # How often to save the model +ALPHA = 0.0004 + +''' +# City Training Hyperparameters +BATCH_SZ = 1 +VERBOSE = False +EPSILON = 1e-9 +IMAGE_SZ = 128 +OUT_DIR = 'output' +MODEL_DIR = os.path.join(OUT_DIR, 'models') +INFO_PATH = os.path.join(OUT_DIR, 'run.txt') +N_TEST = 1 +N_ITERS = 5000 +N_ITERS_P1 = 1000 # How many iterations to train in phase 1 +N_ITERS_P2 = 400 # How many iterations to train in phase 2 +INTV_PRINT = 50 # How often to print +INTV_SAVE = 10000 # How often to save the model +ALPHA = 0.0004 +''' + +# Check that we don't clobber a pre-existing run +if len(sys.argv) < 2 and os.path.isdir(OUT_DIR) and len(os.listdir(OUT_DIR)) > 2: + print('Warning, OUT_DIR already exists. Aborting.') + exit() + +# Load in a model if specified as the second argument. +start_iter = 0 +model_filename = None +if len(sys.argv) >= 2: + start_iter = int(sys.argv[1]) + model_filename = os.path.join(MODEL_DIR, 'model%d.ckpt' % start_iter) + + + +# Generator code +G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') +DG_X = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 3], name='DG_X') + +# Load Places365 data +data = np.load('places/places_128.npz') +imgs = data['imgs_train'] # Originally from http://data.csail.mit.edu/places/places365/val_256.tar +imgs_p = util.preprocess_images_outpainting(imgs) + +test_imgs = data['imgs_test'] +test_imgs_p = util.preprocess_images_outpainting(test_imgs) + +test_img = test_imgs[:N_TEST] +test_img_p = test_imgs_p[:N_TEST] + +train_img = imgs[4, np.newaxis] +train_img_p = imgs_p[4, np.newaxis] + +''' +# Load city image data +imgs = util.load_city_image() +imgs_p = util.preprocess_images_outpainting(imgs) + +test_imgs = util.load_city_image() +test_imgs_p = util.preprocess_images_outpainting(test_imgs) + +test_img = test_imgs +test_img_p = test_imgs_p + +train_img = imgs +train_img_p = imgs_p +''' + +# Write training and testing sample ground truths as reference +util.save_image(train_img[0], os.path.join(OUT_DIR, 'train_img.png')) +for i_test in range(N_TEST): + util.save_image(test_imgs[i_test], os.path.join(OUT_DIR, 'test_img_%d.png' % i_test)) + +G_sample = model.generator(G_Z) +vars_G = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='G') + +C_real = model.concatenator(model.global_discriminator(DG_X)) +C_fake = model.concatenator(model.global_discriminator(G_sample)) +vars_DG = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='DG') +vars_C = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='C') + +C_loss = -tf.reduce_mean(tf.log(tf.maximum(C_real, EPSILON)) + tf.log(tf.maximum(1. - C_fake, EPSILON))) +G_MSE_loss = tf.losses.mean_squared_error(G_sample, DG_X, weights=tf.expand_dims(G_Z[:,:,:,3], -1)) # TODO: MULTIPLY with mask. Actually see if we want to remove this. +G_loss = G_MSE_loss - ALPHA * tf.reduce_mean(tf.log(tf.maximum(C_fake, EPSILON))) + +C_solver = tf.train.AdamOptimizer().minimize(C_loss, var_list=(vars_DG + vars_C)) +G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=vars_G) +G_MSE_solver = tf.train.AdamOptimizer().minimize(G_MSE_loss, var_list=vars_G) + +train_MSE_loss = [] +dev_MSE_loss = [] + +last_output_PATH = [None] * N_TEST + +assert N_ITERS > N_ITERS_P1 + N_ITERS_P2 + +# Saver to save the session +saver = tf.train.Saver() + +with tf.Session() as sess: + if model_filename is None: + sess.run(tf.global_variables_initializer()) + else: + saver.restore(sess, model_filename) + for i in range(start_iter, N_ITERS + 1): + batch, batch_p = util.sample_random_minibatch(imgs, imgs_p, BATCH_SZ) + G_sample_ = None + C_loss_curr, G_loss_curr, G_MSE_loss_curr = None, None, None + if i < N_ITERS_P1: # Stage 1 - Train Generator Only + if i == 0: + print('------------------> Beginning Phase 1...') + _, G_MSE_loss_curr, G_sample_ = sess.run([G_MSE_solver, G_MSE_loss, G_sample], feed_dict={DG_X: batch, G_Z: batch_p}) + elif i < N_ITERS_P1 + N_ITERS_P2: # Stage 2 - Train Discriminator Only + if i == N_ITERS_P1: + print('------------------> Beginning Phase 2...') + _, C_loss_curr, C_real_, C_fake_ = sess.run([C_solver, C_loss, C_real, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, C_loss_curr, np.min(C_real_), np.max(C_real_), np.min(C_fake_), np.max(C_fake_))) + else: # Stage 3 - Train both Generator and Discriminator + if i == N_ITERS_P1 + N_ITERS_P2: + print('------------------> Beginning Phase 3...') + _, C_loss_curr, C_real_, C_fake_ = sess.run([C_solver, C_loss, C_real, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, C_loss_curr, 'D', np.min(C_real_), np.max(C_real_), np.min(C_fake_), np.max(C_fake_))) + _, G_loss_curr, G_MSE_loss_curr, G_sample_, C_fake_ = sess.run([G_solver, G_loss, G_MSE_loss, G_sample, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, G_loss_curr, 'G', np.min(C_fake_), np.max(C_fake_))) + + # Periodically test the generator on held-out images + if i % INTV_PRINT == 0: + G_MSE_loss_curr_dev = None + if G_sample_ is not None: + # Print out the dev image + output, G_MSE_loss_curr_dev = sess.run([G_sample, G_MSE_loss], feed_dict={DG_X: test_img, G_Z: test_img_p}) + for i_test in range(N_TEST): + util.save_image(output[i_test], os.path.join(OUT_DIR, 'dev_%d_%d.png' % (i_test, i))) + last_output_PATH[i_test] = os.path.join(OUT_DIR, 'dev_%d_%d.png' % (i_test, i)) + # Also save the train image + output, = sess.run([G_sample], feed_dict={DG_X: train_img, G_Z: train_img_p}) + util.save_image(output[0], os.path.join(OUT_DIR, 'train%d.png' % i)) + print('Iteration [%d/%d]:' % (i, N_ITERS)) + if G_MSE_loss_curr is not None: + print('\tG_MSE_loss (train) = %f' % G_MSE_loss_curr) + if G_MSE_loss_curr_dev is not None: + print('\tG_MSE_loss (dev) = %f' % G_MSE_loss_curr_dev) + if G_loss_curr is not None: + print('\tG_loss = %f' % G_loss_curr) + if C_loss_curr is not None: + print('\tC_loss = %f' % C_loss_curr) + + # Keep track of losses for logging + if G_MSE_loss_curr is not None: + train_MSE_loss.append([i, G_MSE_loss_curr]) + if G_MSE_loss_curr_dev is not None: + dev_MSE_loss.append([i, G_MSE_loss_curr_dev]) + + # Save the model every so often + if i % INTV_SAVE == 0: + save_path = saver.save(sess, os.path.join(MODEL_DIR, 'model%d.ckpt' % i)) + print('Model saved in path: %s' % save_path) + + # Save the loss every so often + if i % INTV_SAVE == 0: + np.savez(os.path.join(OUT_DIR, 'loss.npz'), train_MSE_loss=np.array(train_MSE_loss), dev_MSE_loss=np.array(dev_MSE_loss)) + +# Save the loss +np.savez(os.path.join(OUT_DIR, 'loss.npz'), train_MSE_loss=np.array(train_MSE_loss), dev_MSE_loss=np.array(dev_MSE_loss)) +# Save the final blended output, and make a graph of the loss. +util.plot_loss(os.path.join(OUT_DIR, 'loss.npz'), 'MSE Loss During Training', os.path.join(OUT_DIR, 'loss_plot.png')) +for i_test in range(N_TEST): + util.postprocess_images_outpainting(os.path.join(OUT_DIR, 'test_img_%d.png' % i_test), last_output_PATH[i_test], os.path.join(OUT_DIR, 'out_paste_%d.png' % i_test), blend=False) + util.postprocess_images_outpainting(os.path.join(OUT_DIR, 'test_img_%d.png' % i_test), last_output_PATH[i_test], os.path.join(OUT_DIR, 'out_blend_%d.png' % i_test), blend=True) diff --git a/src/.ipynb_checkpoints/train_ld-checkpoint.py b/src/.ipynb_checkpoints/train_ld-checkpoint.py new file mode 100644 index 0000000..7a1b4cf --- /dev/null +++ b/src/.ipynb_checkpoints/train_ld-checkpoint.py @@ -0,0 +1,200 @@ +# proj: image-outpainting +# file: train_ld.py +# authors: Mark Sabini, Gili Rusak +# desc: Train the model specified in model_ld.py, which +# uses both global and local discriminators. +# ------------------------------------------------------------- +import tensorflow as tf +import numpy as np +from PIL import Image +import model_ld as model +import util +import os +import sys + +tf.reset_default_graph() + +# Places365 Training Hyperparameters +BATCH_SZ = 16 +VERBOSE = False +EPSILON = 1e-9 +IMAGE_SZ = 128 +OUT_DIR = 'output' +MODEL_DIR = os.path.join(OUT_DIR, 'models') +INFO_PATH = os.path.join(OUT_DIR, 'run.txt') +N_TEST = 10 +N_ITERS = 64000 +N_ITERS_P1 = 20000 # How many iterations to train in phase 1 +N_ITERS_P2 = 4000 # How many iterations to train in phase 2 +INTV_PRINT = 200 # How often to print +INTV_SAVE = 1000 # How often to save the model +ALPHA = 0.0004 + +''' +# City Training Hyperparameters +BATCH_SZ = 1 +VERBOSE = False +EPSILON = 1e-9 +IMAGE_SZ = 128 +OUT_DIR = 'output' +MODEL_DIR = os.path.join(OUT_DIR, 'models') +INFO_PATH = os.path.join(OUT_DIR, 'run.txt') +N_TEST = 1 +N_ITERS = 5000 +N_ITERS_P1 = 1000 # How many iterations to train in phase 1 +N_ITERS_P2 = 400 # How many iterations to train in phase 2 +INTV_PRINT = 50 # How often to print +INTV_SAVE = 10000 # How often to save the model +ALPHA = 0.0004 +''' + +# Check that we don't clobber a pre-existing run +if len(sys.argv) < 2 and os.path.isdir(OUT_DIR) and len(os.listdir(OUT_DIR)) > 2: + print('Warning, OUT_DIR already exists. Aborting.') + exit() + +# Load in a model if specified as the second argument. +start_iter = 0 +model_filename = None +if len(sys.argv) >= 2: + start_iter = int(sys.argv[1]) + model_filename = os.path.join(MODEL_DIR, 'model%d.ckpt' % start_iter) + +# Generator code +G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z') +DG_X = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 3], name='DG_X') + +# Load Places365 data +data = np.load('places/places_128.npz') +imgs = data['imgs_train'] # Originally from http://data.csail.mit.edu/places/places365/val_256.tar +imgs_p = util.preprocess_images_outpainting(imgs) + +test_imgs = data['imgs_test'] +test_imgs_p = util.preprocess_images_outpainting(test_imgs) + +test_img = test_imgs[:N_TEST] +test_img_p = test_imgs_p[:N_TEST] + +train_img = imgs[4, np.newaxis] +train_img_p = imgs_p[4, np.newaxis] + +''' +# Load city image data +imgs = util.load_city_image() +imgs_p = util.preprocess_images_outpainting(imgs) + +test_imgs = util.load_city_image() +test_imgs_p = util.preprocess_images_outpainting(test_imgs) + +test_img = test_imgs +test_img_p = test_imgs_p + +train_img = imgs +train_img_p = imgs_p +''' + +# Write training and testing sample ground truths as reference +util.save_image(train_img[0], os.path.join(OUT_DIR, 'train_img.png')) +for i_test in range(N_TEST): + util.save_image(test_imgs[i_test], os.path.join(OUT_DIR, 'test_img_%d.png' % i_test)) + +G_sample = model.generator(G_Z) +vars_G = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='G') + +C_real = model.concatenator(model.global_discriminator(DG_X), model.local_discriminator(DG_X[:, :, :IMAGE_SZ // 2, :]), model.local_discriminator(tf.reverse(DG_X[:, :, -IMAGE_SZ // 2:, :], axis=[2]))) +C_fake = model.concatenator(model.global_discriminator(G_sample), model.local_discriminator(G_sample[:, :, :IMAGE_SZ // 2, :]), model.local_discriminator(tf.reverse(G_sample[:, :, -IMAGE_SZ // 2:, :], axis=[2]))) +vars_DG = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='DG') +vars_DL = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='DL') +vars_C = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='C') + +C_loss = -tf.reduce_mean(tf.log(tf.maximum(C_real, EPSILON)) + tf.log(tf.maximum(1. - C_fake, EPSILON))) +G_MSE_loss = tf.losses.mean_squared_error(G_sample, DG_X, weights=tf.expand_dims(G_Z[:,:,:,3], -1)) # TODO: MULTIPLY with mask. Actually see if we want to remove this. +G_loss = G_MSE_loss - ALPHA * tf.reduce_mean(tf.log(tf.maximum(C_fake, EPSILON))) + +C_solver = tf.train.AdamOptimizer().minimize(C_loss, var_list=(vars_DG + vars_DL + vars_C)) +G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=vars_G) +G_MSE_solver = tf.train.AdamOptimizer().minimize(G_MSE_loss, var_list=vars_G) + +train_MSE_loss = [] +dev_MSE_loss = [] + +last_output_PATH = [None] * N_TEST + +assert N_ITERS > N_ITERS_P1 + N_ITERS_P2 + +# Saver to save the session +saver = tf.train.Saver() + +with tf.Session() as sess: + if model_filename is None: + sess.run(tf.global_variables_initializer()) + else: + saver.restore(sess, model_filename) + for i in range(start_iter, N_ITERS + 1): + batch, batch_p = util.sample_random_minibatch(imgs, imgs_p, BATCH_SZ) + G_sample_ = None + C_loss_curr, G_loss_curr, G_MSE_loss_curr = None, None, None + if i < N_ITERS_P1: # Stage 1 - Train Generator Only + if i == 0: + print('------------------> Beginning Phase 1...') + _, G_MSE_loss_curr, G_sample_ = sess.run([G_MSE_solver, G_MSE_loss, G_sample], feed_dict={DG_X: batch, G_Z: batch_p}) + elif i < N_ITERS_P1 + N_ITERS_P2: # Stage 2 - Train Discriminator Only + if i == N_ITERS_P1: + print('------------------> Beginning Phase 2...') + _, C_loss_curr, C_real_, C_fake_ = sess.run([C_solver, C_loss, C_real, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, C_loss_curr, np.min(C_real_), np.max(C_real_), np.min(C_fake_), np.max(C_fake_))) + else: # Stage 3 - Train both Generator and Discriminator + if i == N_ITERS_P1 + N_ITERS_P2: + print('------------------> Beginning Phase 3...') + _, C_loss_curr, C_real_, C_fake_ = sess.run([C_solver, C_loss, C_real, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, C_loss_curr, 'D', np.min(C_real_), np.max(C_real_), np.min(C_fake_), np.max(C_fake_))) + _, G_loss_curr, G_MSE_loss_curr, G_sample_, C_fake_ = sess.run([G_solver, G_loss, G_MSE_loss, G_sample, C_fake], feed_dict={DG_X: batch, G_Z: batch_p}) + if VERBOSE: + print((i, G_loss_curr, 'G', np.min(C_fake_), np.max(C_fake_))) + + # Periodically test the generator on held-out images + if i % INTV_PRINT == 0: + G_MSE_loss_curr_dev = None + if G_sample_ is not None: + # Print out the dev image + output, G_MSE_loss_curr_dev = sess.run([G_sample, G_MSE_loss], feed_dict={DG_X: test_img, G_Z: test_img_p}) + for i_test in range(N_TEST): + util.save_image(output[i_test], os.path.join(OUT_DIR, 'dev_%d_%d.png' % (i_test, i))) + last_output_PATH[i_test] = os.path.join(OUT_DIR, 'dev_%d_%d.png' % (i_test, i)) + # Also save the train image + output, = sess.run([G_sample], feed_dict={DG_X: train_img, G_Z: train_img_p}) + util.save_image(output[0], os.path.join(OUT_DIR, 'train%d.png' % i)) + print('Iteration [%d/%d]:' % (i, N_ITERS)) + if G_MSE_loss_curr is not None: + print('\tG_MSE_loss (train) = %f' % G_MSE_loss_curr) + if G_MSE_loss_curr_dev is not None: + print('\tG_MSE_loss (dev) = %f' % G_MSE_loss_curr_dev) + if G_loss_curr is not None: + print('\tG_loss = %f' % G_loss_curr) + if C_loss_curr is not None: + print('\tC_loss = %f' % C_loss_curr) + + # Keep track of losses for logging + if G_MSE_loss_curr is not None: + train_MSE_loss.append([i, G_MSE_loss_curr]) + if G_MSE_loss_curr_dev is not None: + dev_MSE_loss.append([i, G_MSE_loss_curr_dev]) + + # Save the model every so often + if i % INTV_SAVE == 0 and i > 0: + save_path = saver.save(sess, os.path.join(MODEL_DIR, 'model%d.ckpt' % i)) + print('Model saved in path: %s' % save_path) + + # Save the loss every so often + if i % INTV_SAVE == 0: + np.savez(os.path.join(OUT_DIR, 'loss.npz'), train_MSE_loss=np.array(train_MSE_loss), dev_MSE_loss=np.array(dev_MSE_loss)) + +# Save the loss +np.savez(os.path.join(OUT_DIR, 'loss.npz'), train_MSE_loss=np.array(train_MSE_loss), dev_MSE_loss=np.array(dev_MSE_loss)) +# Save the final blended output, and make a graph of the loss. +util.plot_loss(os.path.join(OUT_DIR, 'loss.npz'), 'MSE Loss During Training', os.path.join(OUT_DIR, 'loss_plot.png')) +for i_test in range(N_TEST): + util.postprocess_images_outpainting(os.path.join(OUT_DIR, 'test_img_%d.png' % i_test), last_output_PATH[i_test], os.path.join(OUT_DIR, 'out_paste_%d.png' % i_test), blend=False) + util.postprocess_images_outpainting(os.path.join(OUT_DIR, 'test_img_%d.png' % i_test), last_output_PATH[i_test], os.path.join(OUT_DIR, 'out_blend_%d.png' % i_test), blend=True) diff --git a/src/.ipynb_checkpoints/util-checkpoint.py b/src/.ipynb_checkpoints/util-checkpoint.py new file mode 100644 index 0000000..aea9dfa --- /dev/null +++ b/src/.ipynb_checkpoints/util-checkpoint.py @@ -0,0 +1,274 @@ +# proj: image-outpainting +# file: util.py +# authors: Mark Sabini, Gili Rusak +# desc: Various utility functions for all sorts of things. +# ------------------------------------------------------------- +import numpy as np +from PIL import Image +import scipy.misc +import matplotlib.pyplot as plt +import cv2 +import os +import re +import imageio + +IMAGE_SZ = 128 # Should be a power of 2 + +# Loads the city image. +# Returns: normalized numpy array of size (1, IMAGE_SZ, IMAGE_SZ, 3) +def load_city_image(): + im = Image.open('images/city_128.png').convert('RGB') + width, height = im.size + left = (width - IMAGE_SZ) / 2 + top = (height - IMAGE_SZ) / 2 + im = im.crop((left, top, left + IMAGE_SZ, top + IMAGE_SZ)) + pix = np.array(im) + assert pix.shape == (IMAGE_SZ, IMAGE_SZ, 3) + return pix[np.newaxis] / 255.0 # Need to normalize images to [0, 1] + +# Loads multiple images from a directory. +# Returns: normalized numpy array of size (m, IMAGE_SZ, IMAGE_SZ, 3) +def load_images(in_PATH, verbose=False): + imgs = [] + for filename in sorted(os.listdir(in_PATH)): + if verbose: + print('Processing %s' % filename) + full_filename = os.path.join(os.path.abspath(in_PATH), filename) + img = Image.open(full_filename).convert('RGB') + pix = np.array(img) + pix_norm = pix / 255.0 + imgs.append(pix_norm) + return np.array(imgs) + +# Reads in all the images in a directory and saves them to an .npy file. +def compile_images(in_PATH, out_PATH): + imgs = load_images(in_PATH, verbose=True) + np.save(out_PATH, imgs) + +# Masks and preprocesses an (m, IMAGE_SZ, IMAGE_SZ, 3) batch of images for image outpainting. +# Returns: numpy array of size (m, IMAGE_SZ, IMAGE_SZ, 4) +def preprocess_images_outpainting(imgs, crop=True): + m = imgs.shape[0] + imgs = np.array(imgs, copy=True) + pix_avg = np.mean(imgs, axis=(1, 2, 3)) + if crop: + imgs[:, :, :int(2 * IMAGE_SZ / 8), :] = imgs[:, :, int(-2 * IMAGE_SZ / 8):, :] = pix_avg[:, np.newaxis, np.newaxis, np.newaxis] + mask = np.zeros((m, IMAGE_SZ, IMAGE_SZ, 1)) + mask[:, :, :int(2 * IMAGE_SZ / 8), :] = mask[:, :, int(-2 * IMAGE_SZ / 8):, :] = 1.0 + imgs_p = np.concatenate((imgs, mask), axis=3) + return imgs_p + +# Expands and preprocesses a single (h, w, 3) image for image outpainting. +# Returns: numpy array of size (h, w + 2 * dw, 4) +def preprocess_images_gen(img): + img = np.array(img, copy=True) + pix_avg = np.mean(img) + dw = int(2 * IMAGE_SZ / 8) # Amount that will be outpainted on each side + img_expand = np.ones((img.shape[0], img.shape[1] + 2 * dw, img.shape[2])) * pix_avg + img_expand[:, dw:-dw, :] = img + mask = np.zeros((img_expand.shape[0], img_expand.shape[1], 1)) + mask[:, :int(2 * IMAGE_SZ / 8), :] = mask[:, int(-2 * IMAGE_SZ / 8):, :] = 1.0 + img_p = np.concatenate((img_expand, mask), axis=2) + return img_p[np.newaxis] + +# Renormalizes an image to [0, 255]. +def norm_image(img_r): + img_norm = (img_r * 255.0).astype(np.uint8) + return img_norm + +# Visualize an image. +def vis_image(img_r, mode='RGB'): + img_norm = norm_image(img_r) + img = Image.fromarray(img_norm, mode) + img.show() + +# Save an image as a .png file. +def save_image(img_r, name, mode='RGB'): + img_norm = norm_image(img_r) + img = Image.fromarray(img_norm, mode) + img.save(name, format='PNG') + +# Sample a random minibatch from data. +# Returns: Two numpy arrays, representing examples and their corresponding +# preprocessed arrays. +def sample_random_minibatch(data, data_p, m): + indices = np.random.randint(0, data.shape[0], m) + return data[indices], data_p[indices] + +# Plots the loss and saves the plot. +def plot_loss(loss_filename, title, out_filename): + loss = np.load(loss_filename) + assert 'train_MSE_loss' in loss and 'dev_MSE_loss' in loss + train_MSE_loss = loss['train_MSE_loss'] + dev_MSE_loss = loss['dev_MSE_loss'] # TODO: Deal with dev_MSE_loss not changing during Phase 2 + label_train, = plt.plot(train_MSE_loss[:, 0], train_MSE_loss[:, 1], label='Training MSE loss') + label_dev, = plt.plot(dev_MSE_loss[:, 0], dev_MSE_loss[:, 1], label='Dev MSE loss') + plt.legend(handles=[label_train, label_dev]) + plt.xlabel('Iteration') + plt.ylabel('MSE Loss') + plt.title(title) + plt.savefig(out_filename) + plt.clf() + +# Plots the loss and saves the plot, but fancier. +def plot_loss2(loss_filename, title, out_filename): + loss = np.load(loss_filename) + itrain_MSE_loss, train_MSE_loss = loss['itrain_MSE_loss'], loss['train_MSE_loss'] + idev_MSE_loss, dev_MSE_loss = loss['idev_MSE_loss'], loss['dev_MSE_loss'] + iG_loss, G_loss = loss['iG_loss'], loss['G_loss'] + iD_loss, D_loss = loss['iD_loss'], loss['D_loss'] + label_train, = plt.plot(itrain_MSE_loss, train_MSE_loss, label='Training MSE loss') + label_dev, = plt.plot(idev_MSE_loss, dev_MSE_loss, label='Dev MSE loss') + label_G, = plt.plot(iG_loss, G_loss, label='Generator loss') + label_D, = plt.plot(iD_loss, D_loss, label='Discriminator loss') + plt.legend(handles=[label_train, label_dev, label_G, label_D]) + plt.xlabel('Iteration') + plt.ylabel('Loss') + plt.title(title) + plt.savefig(out_filename) + plt.clf() + +# Use seamless cloning to improve the generator's output. +def postprocess_images_outpainting(img_PATH, img_o_PATH, out_PATH, blend=False): # img, img_0 are (64, 64, 3), mask is (64, 64, 1) + src = cv2.imread(img_PATH)[:, int(2 * IMAGE_SZ / 8):-int(2 * IMAGE_SZ / 8), :] + dst = cv2.imread(img_o_PATH) + if blend: + mask = np.ones(src.shape, src.dtype) * 255 + center = (int(IMAGE_SZ / 2) - 1, int(IMAGE_SZ / 2) - 1) + out = cv2.seamlessClone(src, dst, mask, center, cv2.NORMAL_CLONE) + else: + out = dst.copy() + out[:, int(2 * IMAGE_SZ / 8):-int(2 * IMAGE_SZ / 8), :] = src + cv2.imwrite(out_PATH, out) + +# Use seamless cloning to improve the generator's output. +def postprocess_images_gen(img, img_o, blend=False): + src = img[:, :, ::-1].copy() + dst = img_o[:, :, ::-1].copy() + if blend: + mask = np.ones(src.shape, src.dtype) * 255 + center = (int(dst.shape[1] / 2) - 1, int(dst.shape[0] / 2) - 1) + out = cv2.seamlessClone(src, dst, mask, center, cv2.NORMAL_CLONE) + else: + out = dst.copy() + out[:, int(2 * IMAGE_SZ / 8):-int(2 * IMAGE_SZ / 8), :] = src + return out[:, :, ::-1].copy() + +# Crop and resize all the images in a directory. +def resize_images(src_PATH, dst_PATH): + for filename in os.listdir(src_PATH): + print('Processing %s' % filename) + full_filename = os.path.join(os.path.abspath(src_PATH), filename) + img_raw = Image.open(full_filename).convert('RGB') + w, h = img_raw.size + if w <= h: + dim = w + y_start = int((h - dim) / 2) + img_crop = img_raw.crop(box=(0, y_start, dim, y_start + dim)) + else: # w > h + dim = h + x_start = int((w - dim) / 2) + img_crop = img_raw.crop(box=(x_start, 0, x_start + dim, dim)) + img_scale = img_crop.resize((IMAGE_SZ, IMAGE_SZ), Image.ANTIALIAS) + full_outfilename = os.path.join(os.path.abspath(dst_PATH), filename) + img_scale.save(full_outfilename, format='PNG') + +# Parse the output of train.py to extract the various losses. +def parse_log(in_PATH, out_PATH): + data = [] + curr_list = [] + with open(in_PATH, 'r') as fp: + for i, line in enumerate(fp): + if i == 0: + continue + line = line.strip() + if line.startswith('----'): + continue + elif line.startswith('Model'): + continue + elif line.startswith('Iteration'): + if len(curr_list): + data.append(curr_list) + curr_list = [] + curr_list.append(line) + else: + curr_list.append(line) + if len(curr_list): + data.append(curr_list) + G_MSE_train, G_MSE_dev, G, C = None, None, None, None + G_MSE_train_s, G_MSE_dev_s, G_s, C_s = [], [], [], [] + G_MSE_train_is, G_MSE_dev_is, G_is, C_is = [], [], [], [] + def extract_loss(str): + return float(re.findall('= ([\d, .]+)', str)[0]) + for entry in data: + i = int(re.findall('\[(\d+)/', entry[0])[0]) + if len(entry) == 3: # Phase 1 + G_MSE_train = extract_loss(entry[1]) + G_MSE_dev = extract_loss(entry[2]) + elif len(entry) == 2: # Phase 2 + C = extract_loss(entry[1]) + elif len(entry) == 5: # Phase 3 + G_MSE_train = extract_loss(entry[1]) + G_MSE_dev = extract_loss(entry[2]) + G = extract_loss(entry[3]) + C = extract_loss(entry[4]) + if G_MSE_train is not None: + G_MSE_train_s.append(G_MSE_train) + G_MSE_train_is.append(i) + if G_MSE_dev is not None: + G_MSE_dev_s.append(G_MSE_dev) + G_MSE_dev_is.append(i) + if G is not None: + G_s.append(G) + G_is.append(i) + if C is not None: + C_s.append(C) + C_is.append(i) + G_MSE_train_sm = np.array(G_MSE_train_s) + G_MSE_dev_sm = np.array(G_MSE_dev_s) + G_sm = np.array(G_s) + C_sm = np.array(C_s) + G_MSE_train_ism = np.array(G_MSE_train_is) + G_MSE_dev_ism = np.array(G_MSE_dev_is) + G_ism = np.array(G_is) + C_ism = np.array(C_is) + np.savez(out_PATH, train_MSE_loss=G_MSE_train_sm, dev_MSE_loss=G_MSE_dev_sm, G_loss=G_sm, D_loss=C_sm, + itrain_MSE_loss=G_MSE_train_ism, idev_MSE_loss=G_MSE_dev_ism, iG_loss=G_ism, iD_loss=C_ism) + +# Smoothes the MSE loss in the output loss file to make plotting easier. +def smooth_MSE_loss(loss_file, window_size, outfile): + losses = np.load(loss_file) + train = losses['train_MSE_loss'] + dev = losses['dev_MSE_loss'] + num_train = train.shape[0] + new_train_list = [] + for i in range(0, num_train, window_size): + window_avg = np.sum(train[i:i+window_size, 1]) / float(window_size) + window_avg_val = np.sum(train[i:i+window_size, 0]) / float(window_size) + new_train_list.append([window_avg_val, window_avg]) + np_train = np.array(new_train_list[:-2]) + np.savez(outfile, train_MSE_loss=np_train, dev_MSE_loss=dev) + +# Create a GIF to enable visualization of generator outputs over the course of training. +def create_GIF(in_PATH, prefix, out_PATH): + indices = range(0, 227401, 200) + images = [] + for index in indices: + full_filename = os.path.join(os.path.abspath(in_PATH), prefix + str(index) + '.png') + try: + images.append(imageio.imread(full_filename)) + except: + continue + images = images[:50] + images[50::10] + [images[-1]] + imageio.mimwrite(out_PATH, images, loop=1, duration=0.1) + +# Compute the RMSE between a ground truth and outpainted image. +def compute_RMSE(image_gt_PATH, image_o_PATH): + im_gt = np.array(Image.open(image_gt_PATH).convert('RGB')).astype(np.float64) + im_o = np.array(Image.open(image_o_PATH).convert('RGB')).astype(np.float64) + assert im_gt.shape == (128, 128, 3) + assert im_o.shape == (128, 128, 3) + M = np.ones((128, 128, 3)) + M[:, 32:96, :] = 0 + num_pixels = 128 * 64 * 3 + return np.sqrt(np.sum(((im_gt - im_o) * M) ** 2) / num_pixels) diff --git a/src/Untitled.ipynb b/src/Untitled.ipynb new file mode 100644 index 0000000..6966702 --- /dev/null +++ b/src/Untitled.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "ea42a489", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-06-27 17:55:39.372674: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory\n", + "2022-06-27 17:55:39.372709: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "non-resource variables are not supported in the long term\n", + "Imported model (for Places365, 128x128 images)\n" + ] + } + ], + "source": [ + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "tf.reset_default_graph()\n", + "import numpy as np\n", + "from PIL import Image\n", + "import model\n", + "import util\n", + "import os\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "24a28be9", + "metadata": {}, + "outputs": [], + "source": [ + "model_PATH='/home/jovyan/work/src/output/models/model2000.ckpt'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2f04ed3d", + "metadata": {}, + "outputs": [], + "source": [ + "def load_demo_image(in_PATH):\n", + " img = np.array(Image.open(in_PATH).convert('RGB'))[np.newaxis] / 255.0\n", + " img_p = util.preprocess_images_outpainting(img)\n", + " return img_p" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4cc717e4", + "metadata": {}, + "outputs": [], + "source": [ + "def inference(model_PATH, img_p):\n", + " G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z')\n", + " G_sample = model.generator(G_Z)\n", + " \n", + " saver = tf.train.Saver()\n", + " with tf.Session() as sess:\n", + " saver.restore(sess, model_PATH)\n", + " output, = sess.run([G_sample], feed_dict={G_Z: img_p})\n", + " img_norm = (output[0] * 255.0).astype(np.uint8)\n", + " img = Image.fromarray(img_norm, 'RGB')\n", + " #util.save_image(output[0], out_PATH)\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ce2fc25", + "metadata": {}, + "outputs": [], + "source": [ + "def handle(conf):\n", + " \"\"\"\n", + " 该方法是部署之后,其他人调用你的服务时候的处理方法。\n", + " 请按规范填写参数结构,这样我们就能替你自动生成配置文件,方便其他人的调用。\n", + " 范例:\n", + " params['key'] = value # value_type: str # description: some description\n", + " value_type 可以选择:img, video, audio, str, int, float, [int], [str], [float]\n", + " 参数请放到params字典中,我们会自动解析该变量。\n", + " \"\"\"\n", + " base64_str = conf['Photo']\n", + " image = load_demo_image(base64_str, image_size, device)\n", + " res = inference(model, image)\n", + " # add your code\n", + " return {'Output': res}\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/model.py b/src/model.py index 1719a09..77b1317 100644 --- a/src/model.py +++ b/src/model.py @@ -4,7 +4,8 @@ # desc: Model for outpainting on 128x128 images with only # a global discriminator. # ------------------------------------------------------------- -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() print('Imported model (for Places365, 128x128 images)') diff --git a/src/output/models/checkpoint b/src/output/models/checkpoint new file mode 100644 index 0000000..f393a80 --- /dev/null +++ b/src/output/models/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model0.ckpt" +all_model_checkpoint_paths: "model0.ckpt" diff --git a/src/output/models/model0.ckpt.data-00000-of-00001 b/src/output/models/model0.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..9779b1f Binary files /dev/null and b/src/output/models/model0.ckpt.data-00000-of-00001 differ diff --git a/src/output/models/model0.ckpt.index b/src/output/models/model0.ckpt.index new file mode 100644 index 0000000..337ee51 Binary files /dev/null and b/src/output/models/model0.ckpt.index differ diff --git a/src/output/models/model0.ckpt.meta b/src/output/models/model0.ckpt.meta new file mode 100644 index 0000000..7771612 Binary files /dev/null and b/src/output/models/model0.ckpt.meta differ diff --git a/src/output/models/model2000.ckpt.data-00000-of-00001 b/src/output/models/model2000.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..e34d579 Binary files /dev/null and b/src/output/models/model2000.ckpt.data-00000-of-00001 differ diff --git a/src/output/models/model2000.ckpt.index b/src/output/models/model2000.ckpt.index new file mode 100644 index 0000000..af7cd69 Binary files /dev/null and b/src/output/models/model2000.ckpt.index differ diff --git a/src/output/models/model2000.ckpt.meta b/src/output/models/model2000.ckpt.meta new file mode 100644 index 0000000..f540352 Binary files /dev/null and b/src/output/models/model2000.ckpt.meta differ diff --git a/src/test.py b/src/test.py index 6dd2f6b..7767aae 100644 --- a/src/test.py +++ b/src/test.py @@ -5,7 +5,8 @@ # the sides of an image, feeds it through the network, and # compares the network output to the original image. # ------------------------------------------------------------- -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np from PIL import Image import model @@ -13,11 +14,9 @@ import os import sys -if len(sys.argv) != 4: - print('Usage: python test.py [model_PATH] [in_PATH] [out_PATH]') - exit() - -_, model_PATH, in_PATH, out_PATH = sys.argv +model_PATH='/home/jovyan/work/src/output/models/model2000.ckpt' +in_PATH='/home/jovyan/work/images/test.png' +out_PATH='/home/jovyan/work/results/test_output.png' tf.reset_default_graph() diff --git a/src/train.py b/src/train.py index 608e0ae..c1ff616 100644 --- a/src/train.py +++ b/src/train.py @@ -4,15 +4,16 @@ # desc: Train the model specified in model.py, which only # uses a global discriminator. # ------------------------------------------------------------- -import tensorflow as tf +import tensorflow.compat.v1 as tf import numpy as np from PIL import Image import model import util import os import sys - -tf.reset_default_graph() +from tensorflow.python.framework import ops +tf.disable_eager_execution() +ops.reset_default_graph() # Places365 Training Hyperparameters BATCH_SZ = 16 @@ -59,6 +60,8 @@ if len(sys.argv) >= 2: start_iter = int(sys.argv[1]) model_filename = os.path.join(MODEL_DIR, 'model%d.ckpt' % start_iter) + + # Generator code G_Z = tf.placeholder(tf.float32, shape=[None, IMAGE_SZ, IMAGE_SZ, 4], name='G_Z')