Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gzip stream #7

Draft
wants to merge 49 commits into
base: development
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
bbc1387
?
omesser Mar 1, 2021
0ba55cd
M
omesser Mar 2, 2021
9e3271c
m
omesser Mar 2, 2021
f00c096
manifest_layer_info list
omesser Mar 2, 2021
e469174
Not gzipping
omesser Mar 2, 2021
72c6506
No
omesser Mar 2, 2021
80ea0ae
compress
omesser Mar 2, 2021
01142fc
shlex
omesser Mar 2, 2021
8199eff
try
omesser Mar 2, 2021
c49458f
headers
omesser Mar 2, 2021
2ea506c
reorder
omesser Mar 2, 2021
802f0dc
log
omesser Mar 2, 2021
9462df9
nicer
omesser Mar 2, 2021
3224407
Moving up
omesser Mar 2, 2021
91b1aa3
M
omesser Mar 2, 2021
6fff684
Handle some cases nicely
omesser Mar 2, 2021
d655e3f
bug fix layer_info access
omesser Mar 2, 2021
497adbf
force
omesser Mar 2, 2021
91c328c
log message change
omesser Mar 2, 2021
61da504
handle symlinkx
omesser Mar 2, 2021
2d06530
Add log, unlink old symlink
omesser Mar 3, 2021
cc7800b
another log
omesser Mar 3, 2021
eb96741
No skipping
omesser Mar 3, 2021
c0a9ec4
log change
omesser Mar 3, 2021
de4e3b5
fmt
omesser Mar 3, 2021
0d75b91
Do all before
omesser Mar 3, 2021
f452bff
Write manifest
omesser Mar 3, 2021
8050a6e
Log harder
omesser Mar 3, 2021
dc34284
M
omesser Mar 3, 2021
5982f8d
M
omesser Mar 3, 2021
a18f8f6
Resolved existing layer skip issue
omesser Mar 3, 2021
c0956d9
compress
omesser Mar 3, 2021
f39ee3b
M
omesser Mar 3, 2021
21611b7
fmting
omesser Mar 3, 2021
0b733f8
M
omesser Mar 4, 2021
2d369da
M
omesser Mar 4, 2021
86564dd
Formatting
omesser Mar 4, 2021
9b1aa08
Leave the discrepency
omesser Mar 4, 2021
e9eec6c
symlinks first
omesser Mar 4, 2021
ad4e7c2
strings
omesser Mar 4, 2021
6b5bf5e
to strings
omesser Mar 4, 2021
7f6cecd
symlink bug
omesser Mar 4, 2021
b61a0aa
M
omesser Mar 5, 2021
86ca822
remove verification and parallelize gzip
omesser Mar 5, 2021
d380023
M
omesser Mar 6, 2021
566813f
Cleaning up
omesser Mar 6, 2021
0452de3
Removing old logic
omesser Mar 7, 2021
1f2fe26
More cleanup for manifest creator
omesser Mar 7, 2021
305c13b
Cleaning old style class defs and unused imports
omesser Mar 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ ENV/
# pycharm proj
.idea

# stray tar.gz files used for manual testing
# stray archive files used for manual testing
*.tar
*.tar.gz
2 changes: 1 addition & 1 deletion core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .manifest_creator import ImageManifestCreator
from .image_manifest_creator import ImageManifestCreator
from .registry import Registry
from .extractor import Extractor
from .processor import Processor
2 changes: 1 addition & 1 deletion core/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import humanfriendly


class Extractor(object):
class Extractor:
def __init__(self, logger, archive_path):
self._logger = logger.get_child('tar')
self._archive_path = os.path.abspath(archive_path)
Expand Down
28 changes: 28 additions & 0 deletions core/image_manifest_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json


class ImageManifestCreator:
def __init__(self, config_path, layers_info, config_info):
self._config_path = config_path
self._layers_info = layers_info
self._config_info = config_info

def create(self):
manifest = dict()
manifest["schemaVersion"] = 2
manifest["mediaType"] = "application/vnd.docker.distribution.manifest.v2+json"
manifest["config"] = {
"mediaType": "application/vnd.docker.container.image.v1+json",
"size": self._config_info['size'],
"digest": self._config_info['digest'],
}
manifest["layers"] = []
for layer_info in self._layers_info:
layer_data = {
"mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
"size": layer_info['size'],
"digest": layer_info['digest'],
}
manifest["layers"].append(layer_data)

return json.dumps(manifest)
43 changes: 0 additions & 43 deletions core/manifest_creator.py

This file was deleted.

37 changes: 29 additions & 8 deletions core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import multiprocessing.pool
import time
import os.path
import shutil
import json

import humanfriendly
Expand All @@ -10,14 +11,17 @@
from . import extractor


class Processor(object):
class Processor:
def __init__(
self,
logger,
tmp_dir,
tmp_dir_override,
parallel,
registry_url,
archive_path,
stream=False,
gzip_layers=False,
login=None,
password=None,
ssl_verify=True,
Expand All @@ -26,6 +30,8 @@ def __init__(
):
self._logger = logger
self._parallel = parallel
self._tmp_dir = tmp_dir
self._tmp_dir_override = tmp_dir_override

if parallel > 1 and stream:
self._logger.info(
Expand All @@ -36,6 +42,7 @@ def __init__(

self._registry = registry.Registry(
logger=self._logger,
gzip_layers=gzip_layers,
registry_url=registry_url,
stream=stream,
login=login,
Expand All @@ -55,8 +62,14 @@ def process(self):
"""
start_time = time.time()
results = []
with tempfile.TemporaryDirectory() as tmp_dir_name:

if self._tmp_dir_override:
tmp_dir_name = self._tmp_dir_override
os.mkdir(tmp_dir_name, 0o700)
else:
tmp_dir_name = tempfile.mkdtemp(dir=self._tmp_dir)

# since we're not always using TemporaryDirectory we're making and cleaning up ourselves
try:
self._logger.info(
'Processing archive',
archive_path=self._extractor.archive_path,
Expand All @@ -83,9 +96,12 @@ def process(self):
pool.close()
pool.join()

# this will throw if any pool worker caught an exception
for res in results:
res.get()
# this will throw if any pool worker caught an exception
for res in results:
res.get()
finally:
shutil.rmtree(tmp_dir_name)
self._logger.verbose('Removed workdir', tmp_dir_name=tmp_dir_name)

elapsed = time.time() - start_time
self._logger.info(
Expand All @@ -95,11 +111,16 @@ def process(self):
)

@staticmethod
def _get_manifest(tmp_dir_name):
with open(os.path.join(tmp_dir_name, 'manifest.json'), 'r') as fh:
def _get_manifest(archive_dir):
with open(os.path.join(archive_dir, 'manifest.json'), 'r') as fh:
manifest = json.loads(fh.read())
return manifest

@staticmethod
def _write_manifest(archive_dir, contents):
with open(os.path.join(archive_dir, 'manifest.json'), 'w') as fh:
json.dump(contents, fh)


#
# Global wrappers to use with multiprocessing.pool.Pool which can't pickle instance methods
Expand Down
Loading