-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial uniq * Added uniq tests * Fixed documentation on read_line_until() * Replace CustomBufferedReader with new vlib version
- Loading branch information
Showing
4 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import common | ||
import os | ||
|
||
struct Settings { | ||
mut: | ||
count bool | ||
repeated bool | ||
unique bool | ||
case_insensitive bool | ||
check_chars int | ||
help bool | ||
version bool | ||
skip_fields int | ||
skip_chars int | ||
line_delimiter u8 | ||
input_file string | ||
output_file string | ||
} | ||
|
||
fn args() Settings { | ||
mut fp := common.flag_parser(os.args) | ||
fp.application(app_name) | ||
fp.description(app_description) | ||
fp.footer("\nA field is a run of blanks (usually spaces and/or TABs), then non-blank\ncharacters. Fields are skipped before chars.\n\nNote: 'uniq' does not detect repeated lines unless they are adjacent.\nYou may want to sort the input first, or use 'sort -u' without 'uniq'.") | ||
|
||
mut st := Settings{} | ||
st.count = fp.bool('count', `c`, false, 'prefix lines by the number of occurrences') | ||
st.repeated = fp.bool('repeated', `d`, false, 'only print duplicate lines, one for each group') | ||
st.unique = fp.bool('unique', `u`, false, 'only print unique lines') | ||
st.case_insensitive = fp.bool('ignore-case', `i`, false, 'ignore differences in case when comparing') | ||
st.check_chars = fp.int('check-chars', `w`, -1, 'compare no more than N characters in lines') | ||
st.skip_fields = fp.int('skip-fields', `f`, -1, 'avoid comparing the first N fields') | ||
st.skip_chars = fp.int('skip-chars', `s`, -1, 'avoid comparing the first N characters') | ||
st.input_file = '-' | ||
st.output_file = '-' | ||
zero_terminated := fp.bool('zero-terminated', `z`, false, 'line delimiter is NUL, not newline') | ||
if zero_terminated { | ||
st.line_delimiter = `\0` | ||
} else { | ||
st.line_delimiter = `\n` | ||
} | ||
fnames := fp.remaining_parameters() | ||
|
||
// Validation | ||
if fnames.len > 2 { | ||
// Exits the program | ||
fail('Too many arguments specified') | ||
} else if fnames.len == 2 { | ||
st.output_file = fnames[1] | ||
} | ||
if fnames.len > 0 { | ||
st.input_file = fnames[0] | ||
} | ||
return st | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
module main | ||
|
||
import io | ||
import math | ||
import os | ||
|
||
// POSIX Spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html | ||
const app_name = 'uniq' | ||
const app_description = 'report or omit repeated lines' | ||
|
||
struct Buffer { | ||
mut: | ||
seen string | ||
count int | ||
} | ||
|
||
@[noreturn] | ||
fn fail(message string) { | ||
eprintln('${app_name}: ${message}') | ||
exit(1) | ||
} | ||
|
||
fn output_line(s Buffer, settings Settings, mut outfile os.File) !bool { | ||
if s.count > 0 { | ||
if (!settings.unique && !settings.repeated) | ||
|| (settings.unique && s.count == 1) | ||
|| (settings.repeated && s.count > 1) { | ||
if settings.count { | ||
outfile.write('${s.count:7} '.bytes())! | ||
} | ||
outfile.write('${s.seen}'.bytes())! | ||
outfile.write(rune(settings.line_delimiter).bytes())! | ||
} | ||
} | ||
return true | ||
} | ||
|
||
fn get_start_of_field(source string, target_field int) int { | ||
mut field := 0 | ||
mut interstitial := true | ||
for i in 0 .. source.len { | ||
if source[i].is_space() { | ||
interstitial = true | ||
} else { | ||
if interstitial { | ||
field += 1 | ||
interstitial = false | ||
// If we skip n fields, we want the start of field n+1 | ||
if field == target_field { | ||
return i | ||
} | ||
} | ||
} | ||
} | ||
return source.len | ||
} | ||
|
||
fn compare(source string, target string, settings Settings) bool { | ||
mut s1 := source | ||
mut s2 := target | ||
|
||
if settings.skip_fields > -1 { | ||
s1 = s1[get_start_of_field(s1, settings.skip_fields + 1)..] | ||
s2 = s2[get_start_of_field(s2, settings.skip_fields + 1)..] | ||
} | ||
|
||
if settings.skip_chars > -1 { | ||
s1 = source[math.min(s1.len, settings.skip_chars)..] | ||
s2 = target[math.min(s2.len, settings.skip_chars)..] | ||
} | ||
|
||
if settings.check_chars > -1 { | ||
s1 = source[0..math.min(s1.len, settings.check_chars)] | ||
s2 = target[0..math.min(s2.len, settings.check_chars)] | ||
} | ||
|
||
if settings.case_insensitive { | ||
return s1.to_lower() == s2.to_lower() | ||
} else { | ||
return s1 == s2 | ||
} | ||
} | ||
|
||
fn uniq(settings Settings) { | ||
mut file := os.File{} | ||
mut outfile := os.File{} | ||
if settings.input_file == '-' { | ||
file = os.stdin() | ||
} else { | ||
file = os.open(settings.input_file) or { | ||
fail('${settings.input_file}: No such file or directory') | ||
} | ||
} | ||
defer { | ||
file.close() | ||
} | ||
|
||
if settings.output_file == '-' { | ||
outfile = os.stdout() | ||
} else { | ||
outfile = os.create(settings.output_file) or { | ||
fail('${settings.output_file}: No such file or directory') | ||
} | ||
} | ||
defer { | ||
outfile.close() | ||
} | ||
|
||
mut br := io.new_buffered_reader(io.BufferedReaderConfig{ reader: file }) | ||
defer { | ||
br.free() | ||
} | ||
|
||
mut s := Buffer{ | ||
seen: '' | ||
count: 0 | ||
} | ||
for { | ||
line := br.read_line(delim: settings.line_delimiter) or { break } | ||
if !compare(line, s.seen, settings) { | ||
output_line(s, settings, mut &outfile) or { panic(err) } | ||
s.seen = line | ||
s.count = 1 | ||
} else { | ||
s.count += 1 | ||
} | ||
} | ||
output_line(s, settings, mut &outfile) or { panic(err) } | ||
} | ||
|
||
fn main() { | ||
uniq(args()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import common.testing | ||
import os | ||
|
||
const util = 'uniq' | ||
const platform_util = $if !windows { | ||
util | ||
} $else { | ||
'coreutils ${util}' | ||
} | ||
|
||
const cmd = testing.new_paired_command(platform_util, executable_under_test) | ||
const executable_under_test = testing.prepare_executable(util) | ||
const temp_dir = testing.temp_folder | ||
|
||
const posix_test_data = [ | ||
'#01 foo0 bar0 foo1 bar1', | ||
'#02 bar0 foo1 bar1 foo1', | ||
'#03 foo0 bar0 foo1 bar1', | ||
'#04', | ||
'#05 foo0 bar0 foo1 bar1', | ||
'#06 foo0 bar0 foo1 bar1', | ||
'#07 bar0 foo1 bar1 foo0', | ||
] | ||
const posix_test_path_newline = 'posix_nl.txt' | ||
const posix_test_path_zeroterm = 'posix_zt.txt' | ||
|
||
fn call_for_test(args string) os.Result { | ||
res := os.execute('${executable_under_test} ${args}') | ||
assert res.exit_code == 0 | ||
return res | ||
} | ||
|
||
fn test_posix_spec_case_1() { | ||
assert cmd.same_results('-c -f 1 posix_nl.txt') | ||
} | ||
|
||
fn test_posix_spec_case_2() { | ||
assert cmd.same_results('-d -f 1 posix_nl.txt') | ||
} | ||
|
||
fn test_posix_spec_case_3() { | ||
assert cmd.same_results('-u -f 1 posix_nl.txt') | ||
} | ||
|
||
fn test_posix_spec_case_4() { | ||
assert cmd.same_results('-d -s 2 posix_nl.txt') | ||
} | ||
|
||
fn test_posix_spec_case_1_zero_term() { | ||
assert call_for_test('-c -f 1 posix_zt.txt').output.split('\0').len == 7 | ||
} | ||
|
||
fn test_posix_spec_case_2_zero_term() { | ||
assert call_for_test('-d -f 1 -z posix_zt.txt').output.split('\0').len == 2 | ||
} | ||
|
||
fn test_posix_spec_case_3_zero_term() { | ||
assert call_for_test('-u -f 1 -z posix_zt.txt').output.split('\0').len == 6 | ||
} | ||
|
||
fn test_posix_spec_case_4_zero_term() { | ||
assert call_for_test('-d -s 2 -z posix_zt.txt').output.split('\0').len == 1 | ||
} | ||
|
||
fn testsuite_begin() { | ||
os.write_file(posix_test_path_newline, posix_test_data.join('\n'))! | ||
os.write_file(posix_test_path_zeroterm, posix_test_data.join('\0'))! | ||
} | ||
|
||
fn testsuite_end() { | ||
os.rm(posix_test_path_newline)! | ||
os.rm(posix_test_path_zeroterm)! | ||
} | ||
|
||
fn test_help_and_version() { | ||
cmd.ensure_help_and_version_options_work()! | ||
} |