Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
awdeorio committed Nov 3, 2023
1 parent 60a08b0 commit 5481aa4
Showing 1 changed file with 21 additions and 16 deletions.
37 changes: 21 additions & 16 deletions madoop/mapreduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,9 @@ def prepare_input_files(input_path, output_dir):
because our use case has smaller inputs we use 1.
"""
# Build a list of input files. If input_path is a file, then use it. If
# input_path is a directory, then grab all the *files* inside.
input_paths = []
if input_path.is_dir():
for path in sorted(input_path.glob('*')):
if path.is_file():
input_paths.append(path)
else:
LOGGER.warning("Ignoring non-file: %s", path)
elif input_path.is_file():
input_paths.append(input_path)
assert input_paths, f"No input: {input_path}"


# Split and copy input files
part_num = 0
total_size = 0
for inpath in input_paths:
for inpath in normalize_input_paths(input_path):
assert inpath.is_file()

# Compute output filenames
Expand Down Expand Up @@ -161,6 +146,26 @@ def prepare_input_files(input_path, output_dir):
LOGGER.debug("total input size=%sB", total_size)


def normalize_input_paths(input_path):
"""Return a list of filtered input files.
If input_path is a file, then use it. If input_path is a directory, then
grab all the *files* inside. Ignore subdirectories.
"""
input_paths = []
if input_path.is_dir():
for path in sorted(input_path.glob('*')):
if path.is_file():
input_paths.append(path)
else:
LOGGER.warning("Ignoring non-file: %s", path)
elif input_path.is_file():
input_paths.append(input_path)
assert input_paths, f"No input: {input_path}"
return input_paths


def is_executable(exe):
"""Verify exe is executable and raise exception if it is not.
Expand Down

0 comments on commit 5481aa4

Please sign in to comment.