Skip to content

Commit

Permalink
feat: implement sum (#121)
Browse files Browse the repository at this point in the history
* feat: implement sum

feat: implement buffered read for BSD algorithm

feat: implement buffered read for SysV algorithm

ref: extract block count fns

chore: clean uo todo

feat: add tests

ref: split printing and data collect

feat: add sysv quirk tests

feat: add rjust fn

test: add bsd main behanior tests

feat: add zero padding to BSD sum, add tests

feat: wide block count col

* cleanup: remove unused import

* chore: apply v fmt

* chore: apply v fmt to wc/

* fix: whitespace output

* fix: apply vfmt

* fix: prod build

* fix: vfmt, again

* Update sum_test.v

Can't use echo in Windows as it produces different outputs; example: `echo hhh | od -t x1`

Linux
0000000 68 68 68 0a
0000004

Windows
0000000 68 68 68 20 0d 0a
0000006

* v fmt

* Update sum_test.v

Avoid `echo` because it behaves differently in Windows

* Account for carriage returns

Windows emits new lines as \r\n but the reference result was calculated on *nix where it was only \n

* Account for carriage returns with proper escaping

---------

Co-authored-by: syrmel <[email protected]>
Co-authored-by: JalonSolov <[email protected]>
  • Loading branch information
3 people authored Nov 30, 2024
1 parent e975b67 commit 13a874e
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 29 deletions.
25 changes: 19 additions & 6 deletions src/sum/parse_args.v
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,32 @@ import common
import os
import time

const app_name = 'sum'
const app_description = '
Print checksum and block counts for each FILE.
With no FILE, or when FILE is -, read standard input.'

struct Args {
sys_v bool
files []string
}

fn parse_args(args []string) Args {
mut fp := common.flag_parser(args)
fp.application(app_name)
fp.description('
Print or check BSD (16-bit) checksums.
fp.description(app_description)

With no FILE, or when FILE is -, read standard input.'.trim_indent())

fp.bool('', `r`, true, 'use BSD sum algorithm (the default), use 1K blocks')
sys_v := fp.bool('sysv', `s`, false, 'use System V sum algorithm, use 512 bytes blocks')
fp.bool('', `r`, true, 'use BSD sum algorithm, use 1K blocks')
mut sys_v := fp.bool('sysv', `s`, false, 'use System V sum algorithm, use 512 bytes blocks')
files_arg := fp.finalize() or { exit_error(err.msg()) }
files := scan_files_arg(files_arg)

// emulate original algorithm switches behavior
if '-rs' in args {
sys_v = true
}

return Args{
sys_v: sys_v
files: files
Expand Down
82 changes: 65 additions & 17 deletions src/sum/sum.v
Original file line number Diff line number Diff line change
@@ -1,33 +1,85 @@
import os

const app_name = 'sum'

struct Args {
sys_v bool
files []string
struct Sum {
checksum u16
block_count u64
mut:
file_name string
}

const bsd_block_size = 1024
const sysv_block_size = 512

fn main() {
args := parse_args(os.args)
mut sums := []Sum{}
mut block_size := match args.sys_v {
true { sysv_block_size }
false { bsd_block_size }
}

for file in args.files {
println(sum(file, args.sys_v))
checksum, mut blocks, file_name := sum(file, args.sys_v)
blocks = get_file_block_count(file, block_size)
sums << Sum{checksum, blocks, file_name}
}

if args.sys_v {
print_sysv(sums)
} else {
print_bsd(mut sums)
}
}

fn get_file_block_count(file string, block_size int) u64 {
file_size := os.file_size(file)
mut blocks := file_size / u64(block_size)
if file_size % u64(block_size) != 0 {
blocks += 1
}
return blocks
}

fn print_sysv(sums []Sum) {
for sum in sums {
println('${sum.checksum} ${sum.block_count}${sum.file_name}'.trim_space())
}
}

fn print_bsd(mut sums []Sum) {
if sums.len == 1 {
sums[0].file_name = ''
}
for sum in sums {
mut block_str := sum.block_count.str()
if block_str.len <= 5 {
block_str = rjust(block_str, 5)
}
checksum_str := '${sum.checksum:05}'
println('${checksum_str} ${block_str}${sum.file_name}'.trim_space())
}
}

fn rjust(s string, width int) string {
if width == 0 {
return s
}
return ' '.repeat(width - s.len) + s
}

fn sum(file string, sys_v bool) string {
fn sum(file string, sys_v bool) (u16, u64, string) {
digest, blocks := match sys_v {
true { sum_sys_v(file) }
else { sum_bsd(file) }
}

name := if file.contains('/sum-') { '' } else { file }
return '${digest:5} ${blocks:5} ${name}'
name := if file.contains('/sum-') { '' } else { ' ${file}' }
return digest, blocks, name
}

fn sum_bsd(file string) (u16, int) {
mut count := 0
fn sum_bsd(file string) (u16, u64) {
mut checksum := u16(0)
mut blocks := u64(0)
mut f := os.open(file) or { exit_error(err.msg()) }
defer { f.close() }

Expand All @@ -36,27 +88,23 @@ fn sum_bsd(file string) (u16, int) {
checksum = (checksum >> 1) + ((checksum & 1) << 15)
checksum += c
checksum &= 0xffff
count += 1
}

blocks := count / 1024 + 1
return checksum, blocks
}

fn sum_sys_v(file string) (u16, int) {
fn sum_sys_v(file string) (u16, u64) {
mut sum := u32(0)
mut count := u32(0)
mut blocks := u64(0)
mut f := os.open(file) or { exit_error(err.msg()) }
defer { f.close() }

for {
c := f.read_raw[u8]() or { break }
sum += c
count += 1
}

r := (sum & 0xffff) + ((sum & 0xffffffff) >> 16)
checksum := u16((r & 0xffff) + (r >> 16))
blocks := count / 512 + 1
return checksum, blocks
}
199 changes: 194 additions & 5 deletions src/sum/sum_test.v
Original file line number Diff line number Diff line change
@@ -1,15 +1,204 @@
module main

import os
import io.util
import common.testing

const eol = testing.output_eol()
const file_sep = os.path_separator

const util = 'sum'

const platform_util = $if !windows {
util
} $else {
'coreutils ${util}'
}

const executable_under_test = testing.prepare_executable(util)

const cmd = testing.new_paired_command(platform_util, executable_under_test)

const test1_txt = os.join_path(testing.temp_folder, 'test1.txt')
const test2_txt = os.join_path(testing.temp_folder, 'test2.txt')
const test3_txt = os.join_path(testing.temp_folder, 'test3.txt')
const long_line = os.join_path(testing.temp_folder, 'long_line')
const large_file = os.join_path(testing.temp_folder, 'large_file')
const main_txt = os.join_path(testing.temp_folder, 'test.txt')

fn test_help_and_version() {
cmd.ensure_help_and_version_options_work()!
}

fn testsuite_begin() {
os.chdir(os.dir(@FILE))!
os.write_file(test1_txt, 'Hello World!\nHow are you?')!
os.write_file(test2_txt, '0123456789abcdefghijklmnopqrstuvwxyz')!
os.write_file(test3_txt, 'dummy')!
os.write_file(long_line, 'z'.repeat(1024 * 151))!
os.write_file(large_file, 'z'.repeat(110 * 1024 * 1024))!

sample_file_name := @FILE.trim_right('sum_test.v') + 'test.txt'
os.cp(sample_file_name, main_txt)!
}

fn testsuite_end() {
os.rm(test1_txt)!
os.rm(test2_txt)!
os.rm(test3_txt)!
os.rm(long_line)!
os.rm(large_file)!
os.rm(main_txt)!
}

/*
tests from main branch for completeness
*/
fn test_bsd() {
assert sum('test.txt', false) == '38039 1 test.txt'
res := os.execute('cat ${main_txt} | ${executable_under_test} -r')

assert res.exit_code == 0
assert res.output == '38039 1${eol}'
}

fn test_sysv() {
assert sum('test.txt', true) == '25426 1 test.txt'
res := os.execute('cat ${main_txt} | ${executable_under_test} -s')

assert res.exit_code == 0
assert res.output == '25426 1${eol}'
}

/*
test main SysV switch behavior
*/
fn test_sysv_stream_succeeds() {
res := os.execute('cat ${test1_txt} | ${executable_under_test} -s')

assert res.exit_code == 0
assert res.output == '2185 1${eol}'
}

fn test_sysv_one_file_succeeds() {
res := os.execute('${executable_under_test} -s ${test1_txt}')

assert res.exit_code == 0
assert res.output == '2185 1 ${test1_txt}${eol}'
}

fn test_sysv_repeated_files_not_get_filtered() {
res := os.execute('${executable_under_test} -s ${test1_txt} ${test1_txt} ${test1_txt}')

assert res.exit_code == 0
assert res.output == '2185 1 ${test1_txt}${eol}2185 1 ${test1_txt}${eol}2185 1 ${test1_txt}${eol}'
}

fn test_sysv_several_files_succeeds() {
res := os.execute('${executable_under_test} -s ${test1_txt} ${test2_txt} ${test3_txt}')

assert res.exit_code == 0
assert res.output == '2185 1 ${test1_txt}${eol}3372 1 ${test2_txt}${eol}556 1 ${test3_txt}${eol}'
}

fn sum_arbitrary_value(value string, arg string) !os.Result {
mut f, path := util.temp_file()!
f.write_string('${value}\n')!
f.close()
res := $if windows {
os.execute("cat ${path} | tr -d '\\r' | ${executable_under_test} ${arg}")
} $else {
os.execute('cat ${path} | ${executable_under_test} ${arg}')
}
os.rm(path)!
return res
}

/*
test SysV output quirks
*/
fn test_sysv_width_2_col_no_padding() {
res := sum_arbitrary_value('', '-s')!

assert res.exit_code == 0
assert res.output == '10 1${eol}'
}

fn test_sysv_width_3_col_no_padding() {
res := sum_arbitrary_value('\x61', '-s')!

assert res.exit_code == 0
assert res.output == '107 1${eol}'
}

fn test_sysv_width_4_col_no_padding() {
res := sum_arbitrary_value('zzzzzzzzz', '-s')!

assert res.exit_code == 0
assert res.output == '1108 1${eol}'
}

fn test_sysv_different_col_widths_no_alignment() {
res := os.execute('${executable_under_test} -s ${long_line} ${test1_txt} ${test2_txt} ${test3_txt}')

assert res.exit_code == 0
assert res.output == '55583 302 ${long_line}${eol}2185 1 ${test1_txt}${eol}3372 1 ${test2_txt}${eol}556 1 ${test3_txt}${eol}'
}

/*
test main BSD switch behavior
*/
fn test_bsd_sum_stream_succeeds() {
res := os.execute('cat ${test1_txt} | ${executable_under_test} -r')

assert res.exit_code == 0
assert res.output == '59852 1${eol}'
}

fn test_bsd_sum_one_file_succeeds() {
res := os.execute('${executable_under_test} -r ${test1_txt}')

assert res.exit_code == 0
assert res.output == '59852 1${eol}'
}

fn test_bsd_sum_repeated_files_not_get_filtered() {
res := os.execute('${executable_under_test} -r ${test1_txt} ${test1_txt} ${test1_txt}')

assert res.exit_code == 0
assert res.output == '59852 1 ${test1_txt}${eol}59852 1 ${test1_txt}${eol}59852 1 ${test1_txt}${eol}'
}

fn test_bsd_sum_several_files_succeeds() {
res := os.execute('${executable_under_test} -r ${test1_txt} ${test2_txt} ${test3_txt}')

assert res.exit_code == 0
assert res.output == '59852 1 ${test1_txt}${eol}11628 1 ${test2_txt}${eol}41183 1 ${test3_txt}${eol}'
}

/*
test BSD output quirks
*/
fn test_bsd_sum_col_width_2_padded_with_zero() {
res := sum_arbitrary_value('\x02', '-r')!

assert res.exit_code == 0
assert res.output == '00011 1${eol}'
}

fn test_bsd_sum_col_width_3_padded_with_zero() {
res := sum_arbitrary_value('hhh', '-r')!

assert res.exit_code == 0
assert res.output == '00101 1${eol}'
}

fn test_bsd_sum_col_width_4_padded_with_zero() {
res := sum_arbitrary_value('hhh', '-r')!

assert res.exit_code == 0
assert res.output == '00101 1${eol}'
}

fn test_bsd_block_col_width_more_than_5_not_aligned() {
// this test needs 100+MB input string and since there's no easy way to mock block count fn,
// we need to create an actual file
res := os.execute('${executable_under_test} -r ${test1_txt} ${large_file} ${test2_txt}')
assert res.exit_code == 0
assert res.output == '59852 1 ${test1_txt}${eol}62707 112640 ${large_file}${eol}11628 1 ${test2_txt}${eol}'
}
2 changes: 1 addition & 1 deletion src/wc/wc_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const long_over_16k = os.join_path(rig.temp_dir, 'long_over_16k')
const long_under_16k = os.join_path(rig.temp_dir, 'long_under_16k')

// todo add tests
// - long line (>16k) count max line
// - test windows \r\n vs \n

fn testsuite_begin() {
rig.assert_platform_util()
Expand Down

0 comments on commit 13a874e

Please sign in to comment.