You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Dec 19, 2024. It is now read-only.
Describe the Bug:
During training on 10k images generated on Unity Simulation the process stops midway during training.
How to Reproduce?
Launch a training job with 10k images on 25 epochs
Using the container gcr.io/unity-ai-thea-test/custom-datasetinsights:v2
What did you expect to happen:
The training job to finish.
Screenshots
Traceback:
Exception in thread Thread-38:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 392, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.7/http/client.py", line 1252, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1298, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1247, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1065, in _send_output
self.send(chunk)
File "/usr/lib/python3.7/http/client.py", line 987, in send
self.sock.sendall(data)
File "/usr/lib/python3.7/ssl.py", line 1034, in sendall
v = self.send(byte_view[count:])
File "/usr/lib/python3.7/ssl.py", line 1003, in send
return self._sslobj.write(data)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 727, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "/usr/local/lib/python3.7/dist-packages/urllib3/util/retry.py", line 403, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.7/dist-packages/urllib3/packages/six.py", line 734, in reraise
raise value.with_traceback(tb)
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 392, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.7/http/client.py", line 1252, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1298, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1247, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.7/http/client.py", line 1065, in _send_output
self.send(chunk)
File "/usr/lib/python3.7/http/client.py", line 987, in send
self.sock.sendall(data)
File "/usr/lib/python3.7/ssl.py", line 1034, in sendall
v = self.send(byte_view[count:])
File "/usr/lib/python3.7/ssl.py", line 1003, in send
return self._sslobj.write(data)
urllib3.exceptions.ProtocolError: ('Connection aborted.', BrokenPipeError(32, 'Broken pipe'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/event_file_writer.py", line 219, in run
self._record_writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/event_file_writer.py", line 69, in flush
self._py_recordio_writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/record_writer.py", line 187, in flush
self._writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/record_writer.py", line 149, in flush
self.blob.upload_from_string(upload_buffer.getvalue())
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1733, in upload_from_string
if_metageneration_not_match=if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1567, in upload_from_file
if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1420, in _do_upload
if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1098, in _do_multipart_upload
response = upload.transmit(transport, data, object_metadata, content_type)
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/requests/upload.py", line 106, in transmit
retry_strategy=self._retry_strategy,
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/requests/_helpers.py", line 136, in http_request
return _helpers.wait_and_retry(func, RequestsMixin._get_status_code, retry_strategy)
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/_helpers.py", line 150, in wait_and_retry
response = func()
File "/usr/local/lib/python3.7/dist-packages/google/auth/transport/requests.py", line 470, in request
**kwargs
File "/usr/local/lib/python3.7/dist-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.7/dist-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/requests/adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BrokenPipeError(32, 'Broken pipe'))
Exception in thread Thread-65:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 426, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 421, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.7/http/client.py", line 1344, in getresponse
response.begin()
File "/usr/lib/python3.7/http/client.py", line 306, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.7/http/client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 727, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "/usr/local/lib/python3.7/dist-packages/urllib3/util/retry.py", line 403, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.7/dist-packages/urllib3/packages/six.py", line 734, in reraise
raise value.with_traceback(tb)
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 426, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py", line 421, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.7/http/client.py", line 1344, in getresponse
response.begin()
File "/usr/lib/python3.7/http/client.py", line 306, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.7/http/client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/event_file_writer.py", line 219, in run
self._record_writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/event_file_writer.py", line 69, in flush
self._py_recordio_writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/record_writer.py", line 187, in flush
self._writer.flush()
File "/usr/local/lib/python3.7/dist-packages/tensorboardX/record_writer.py", line 149, in flush
self.blob.upload_from_string(upload_buffer.getvalue())
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1733, in upload_from_string
if_metageneration_not_match=if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1567, in upload_from_file
if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1420, in _do_upload
if_metageneration_not_match,
File "/usr/local/lib/python3.7/dist-packages/google/cloud/storage/blob.py", line 1098, in _do_multipart_upload
response = upload.transmit(transport, data, object_metadata, content_type)
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/requests/upload.py", line 106, in transmit
retry_strategy=self._retry_strategy,
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/requests/_helpers.py", line 136, in http_request
return _helpers.wait_and_retry(func, RequestsMixin._get_status_code, retry_strategy)
File "/usr/local/lib/python3.7/dist-packages/google/resumable_media/_helpers.py", line 150, in wait_and_retry
response = func()
File "/usr/local/lib/python3.7/dist-packages/google/auth/transport/requests.py", line 470, in request
**kwargs
File "/usr/local/lib/python3.7/dist-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.7/dist-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/requests/adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Environment:
OS + version: [e.g. Ubuntu 20.04.1 LTS]
datasetinsights version
Environment: (which example environment you used to reproduce the error)
Other environment settings
The text was updated successfully, but these errors were encountered:
Describe the Bug:
During training on 10k images generated on Unity Simulation the process stops midway during training.
How to Reproduce?
Launch a training job with 10k images on 25 epochs
Using the container gcr.io/unity-ai-thea-test/custom-datasetinsights:v2
What did you expect to happen:
The training job to finish.
Screenshots
Traceback:
Environment:
The text was updated successfully, but these errors were encountered: