Skip to content

Commit

Permalink
Adding the uniq utility (#123)
Browse files Browse the repository at this point in the history
* Initial uniq

* Added uniq tests

* Fixed documentation on read_line_until()

* Replace CustomBufferedReader with new vlib version
  • Loading branch information
syrmel authored Jan 26, 2024
1 parent d29bba7 commit b26640e
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 0 deletions.
Empty file removed src/uniq/delete.me
Empty file.
55 changes: 55 additions & 0 deletions src/uniq/settings.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import common
import os

struct Settings {
mut:
count bool
repeated bool
unique bool
case_insensitive bool
check_chars int
help bool
version bool
skip_fields int
skip_chars int
line_delimiter u8
input_file string
output_file string
}

fn args() Settings {
mut fp := common.flag_parser(os.args)
fp.application(app_name)
fp.description(app_description)
fp.footer("\nA field is a run of blanks (usually spaces and/or TABs), then non-blank\ncharacters. Fields are skipped before chars.\n\nNote: 'uniq' does not detect repeated lines unless they are adjacent.\nYou may want to sort the input first, or use 'sort -u' without 'uniq'.")

mut st := Settings{}
st.count = fp.bool('count', `c`, false, 'prefix lines by the number of occurrences')
st.repeated = fp.bool('repeated', `d`, false, 'only print duplicate lines, one for each group')
st.unique = fp.bool('unique', `u`, false, 'only print unique lines')
st.case_insensitive = fp.bool('ignore-case', `i`, false, 'ignore differences in case when comparing')
st.check_chars = fp.int('check-chars', `w`, -1, 'compare no more than N characters in lines')
st.skip_fields = fp.int('skip-fields', `f`, -1, 'avoid comparing the first N fields')
st.skip_chars = fp.int('skip-chars', `s`, -1, 'avoid comparing the first N characters')
st.input_file = '-'
st.output_file = '-'
zero_terminated := fp.bool('zero-terminated', `z`, false, 'line delimiter is NUL, not newline')
if zero_terminated {
st.line_delimiter = `\0`
} else {
st.line_delimiter = `\n`
}
fnames := fp.remaining_parameters()

// Validation
if fnames.len > 2 {
// Exits the program
fail('Too many arguments specified')
} else if fnames.len == 2 {
st.output_file = fnames[1]
}
if fnames.len > 0 {
st.input_file = fnames[0]
}
return st
}
133 changes: 133 additions & 0 deletions src/uniq/uniq.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
module main

import io
import math
import os

// POSIX Spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
const app_name = 'uniq'
const app_description = 'report or omit repeated lines'

struct Buffer {
mut:
seen string
count int
}

@[noreturn]
fn fail(message string) {
eprintln('${app_name}: ${message}')
exit(1)
}

fn output_line(s Buffer, settings Settings, mut outfile os.File) !bool {
if s.count > 0 {
if (!settings.unique && !settings.repeated)
|| (settings.unique && s.count == 1)
|| (settings.repeated && s.count > 1) {
if settings.count {
outfile.write('${s.count:7} '.bytes())!
}
outfile.write('${s.seen}'.bytes())!
outfile.write(rune(settings.line_delimiter).bytes())!
}
}
return true
}

fn get_start_of_field(source string, target_field int) int {
mut field := 0
mut interstitial := true
for i in 0 .. source.len {
if source[i].is_space() {
interstitial = true
} else {
if interstitial {
field += 1
interstitial = false
// If we skip n fields, we want the start of field n+1
if field == target_field {
return i
}
}
}
}
return source.len
}

fn compare(source string, target string, settings Settings) bool {
mut s1 := source
mut s2 := target

if settings.skip_fields > -1 {
s1 = s1[get_start_of_field(s1, settings.skip_fields + 1)..]
s2 = s2[get_start_of_field(s2, settings.skip_fields + 1)..]
}

if settings.skip_chars > -1 {
s1 = source[math.min(s1.len, settings.skip_chars)..]
s2 = target[math.min(s2.len, settings.skip_chars)..]
}

if settings.check_chars > -1 {
s1 = source[0..math.min(s1.len, settings.check_chars)]
s2 = target[0..math.min(s2.len, settings.check_chars)]
}

if settings.case_insensitive {
return s1.to_lower() == s2.to_lower()
} else {
return s1 == s2
}
}

fn uniq(settings Settings) {
mut file := os.File{}
mut outfile := os.File{}
if settings.input_file == '-' {
file = os.stdin()
} else {
file = os.open(settings.input_file) or {
fail('${settings.input_file}: No such file or directory')
}
}
defer {
file.close()
}

if settings.output_file == '-' {
outfile = os.stdout()
} else {
outfile = os.create(settings.output_file) or {
fail('${settings.output_file}: No such file or directory')
}
}
defer {
outfile.close()
}

mut br := io.new_buffered_reader(io.BufferedReaderConfig{ reader: file })
defer {
br.free()
}

mut s := Buffer{
seen: ''
count: 0
}
for {
line := br.read_line(delim: settings.line_delimiter) or { break }
if !compare(line, s.seen, settings) {
output_line(s, settings, mut &outfile) or { panic(err) }
s.seen = line
s.count = 1
} else {
s.count += 1
}
}
output_line(s, settings, mut &outfile) or { panic(err) }
}

fn main() {
uniq(args())
}
77 changes: 77 additions & 0 deletions src/uniq/uniq_test.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import common.testing
import os

const util = 'uniq'
const platform_util = $if !windows {
util
} $else {
'coreutils ${util}'
}

const cmd = testing.new_paired_command(platform_util, executable_under_test)
const executable_under_test = testing.prepare_executable(util)
const temp_dir = testing.temp_folder

const posix_test_data = [
'#01 foo0 bar0 foo1 bar1',
'#02 bar0 foo1 bar1 foo1',
'#03 foo0 bar0 foo1 bar1',
'#04',
'#05 foo0 bar0 foo1 bar1',
'#06 foo0 bar0 foo1 bar1',
'#07 bar0 foo1 bar1 foo0',
]
const posix_test_path_newline = 'posix_nl.txt'
const posix_test_path_zeroterm = 'posix_zt.txt'

fn call_for_test(args string) os.Result {
res := os.execute('${executable_under_test} ${args}')
assert res.exit_code == 0
return res
}

fn test_posix_spec_case_1() {
assert cmd.same_results('-c -f 1 posix_nl.txt')
}

fn test_posix_spec_case_2() {
assert cmd.same_results('-d -f 1 posix_nl.txt')
}

fn test_posix_spec_case_3() {
assert cmd.same_results('-u -f 1 posix_nl.txt')
}

fn test_posix_spec_case_4() {
assert cmd.same_results('-d -s 2 posix_nl.txt')
}

fn test_posix_spec_case_1_zero_term() {
assert call_for_test('-c -f 1 posix_zt.txt').output.split('\0').len == 7
}

fn test_posix_spec_case_2_zero_term() {
assert call_for_test('-d -f 1 -z posix_zt.txt').output.split('\0').len == 2
}

fn test_posix_spec_case_3_zero_term() {
assert call_for_test('-u -f 1 -z posix_zt.txt').output.split('\0').len == 6
}

fn test_posix_spec_case_4_zero_term() {
assert call_for_test('-d -s 2 -z posix_zt.txt').output.split('\0').len == 1
}

fn testsuite_begin() {
os.write_file(posix_test_path_newline, posix_test_data.join('\n'))!
os.write_file(posix_test_path_zeroterm, posix_test_data.join('\0'))!
}

fn testsuite_end() {
os.rm(posix_test_path_newline)!
os.rm(posix_test_path_zeroterm)!
}

fn test_help_and_version() {
cmd.ensure_help_and_version_options_work()!
}

0 comments on commit b26640e

Please sign in to comment.