Skip to content

Commit

Permalink
sort implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-ward committed Jul 2, 2024
1 parent 0d975f2 commit d906022
Show file tree
Hide file tree
Showing 8 changed files with 856 additions and 2 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ compare against the true GNU coreutils version on the Linux-based tests first.
| | link | Make a hard link via the link syscall |
| ✓ | ln | Make links between files |
| ✓ | logname | Print current login name |
| ✓ | ls | List directory contents |
| | ls | List directory contents |
| ✓ | md5sum | Print or check MD5 digests |
| ✓ | mkdir | Make directories |
| | mkfifo | Make FIFOs (named pipes) |
Expand Down Expand Up @@ -130,7 +130,7 @@ compare against the true GNU coreutils version on the Linux-based tests first.
| | shred | Remove files more securely |
| ✓ | shuf | Shuffling text |
| ✓ | sleep | Delay for a specified time |
| | sort | Sort text files |
| ✓ | sort | Sort text files |
| | split | Split a file into pieces |
| ✓ | stat | Report file or file system status |
| | stdbuf | Run a command with modified I/O stream buffering |
Expand Down
Empty file removed src/sort/delete.me
Empty file.
129 changes: 129 additions & 0 deletions src/sort/options.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import common
import flag
import os
import time

const app_name = 'sort'

struct Options {
ignore_leading_blanks bool
dictionary_order bool
ignore_case bool
ignore_non_printing bool
numeric bool
reverse bool
// other optoins
check_diagnose bool
check_quiet bool
sort_keys []string
field_separator string = ' '
merge bool
output_file string
unique bool
files []string
}

fn get_options() Options {
mut fp := flag.new_flag_parser(os.args)
fp.application(app_name)
fp.version(common.coreutils_version())
fp.skip_executable()
fp.arguments_description('[FILE]')
fp.description('\nWrite sorted concatenation of all FILE(s) to standard output.' +
'\nWith no FILE, or when FILE is -, read standard input.')

ignore_leading_blanks := fp.bool('ignore-leading-blanks', `b`, false, 'ignore leading blanks')
dictionary_order := fp.bool('dictionary-order', `d`, false, 'consider only blanks and alphanumeric characters')
ignore_case := fp.bool('ignore-case', `f`, false, 'fold lower case to upper case characters')
ignore_non_printing := fp.bool('ignore-non-printing', `i`, false, 'consider only printable characters')
numeric := fp.bool('numeric-sort', `n`, false,
'Restrict the sort key to an initial numeric\n${flag.space}' +
'string, consisting of optional <blank> characters,\n${flag.space}' +
'optional <hyphen-minus> character, and zero or\n${flag.space}' +
'more digits, which shall be sorted by arithmetic\n${flag.space}' +
'value. An empty digit string shall be treated as\n${flag.space}' +
'zero. Leading zeros shall not affect ordering.')
reverse := fp.bool('reverse', `r`, false, 'reverse the result of comparisons\n\nOther options:')

check_diagnose := fp.bool('', `c`, false, 'check for sorted input; do not sort')
check_quiet := fp.bool('', `C`, false, 'like -c, but do not report first bad line')
sort_keys := fp.string_multi('key', `k`, 'sort via a key(s); <string> gives location and type')
merge := fp.bool('merge', `m`, false, 'merge already sorted files; do not sort')
field_separator := fp.string('', `t`, ' ', 'use <string> as field separator')
output_file := fp.string('output', `o`, '', 'write result to FILE instead of standard output')
unique := fp.bool('unique', `u`, false, 'with -c, check for strict ordering;\n${flag.space}' +
'without -c, output only the first of an equal run')

fp.footer("
KEYDEF is F[.C][OPTS][,F[.C][OPTS]] for start and stop position,
where F is a field number and C a character position in the
field; both are origin 1, and the stop position defaults to the
line's end. If neither -t nor -b is in effect, characters in a
field are counted from the beginning of the preceding whitespace.
OPTS is one or more single-letter ordering options [bdfir], which
override global ordering options for that key. If no key is
given, use the entire line as the key.".trim_indent())

fp.footer(common.coreutils_footer())
files := fp.finalize() or { exit_error(err.msg()) }

return Options{
ignore_leading_blanks: ignore_leading_blanks
dictionary_order: dictionary_order
ignore_case: ignore_case
ignore_non_printing: ignore_non_printing
numeric: numeric
reverse: reverse
// other options
check_diagnose: check_diagnose
check_quiet: check_quiet
sort_keys: sort_keys
field_separator: field_separator
merge: merge
output_file: output_file
unique: unique
files: scan_files_arg(files)
}
}

fn scan_files_arg(files_arg []string) []string {
mut files := []string{}
for file in files_arg {
if file == '-' {
files << stdin_to_tmp()
continue
}
files << file
}
if files.len == 0 {
files << stdin_to_tmp()
}
return files
}

const tmp_pattern = '/${app_name}-tmp-'

fn stdin_to_tmp() string {
tmp := '${os.temp_dir()}/${tmp_pattern}${time.ticks()}'
os.create(tmp) or { exit_error(err.msg()) }
mut f := os.open_append(tmp) or { exit_error(err.msg()) }
defer { f.close() }
for {
s := os.get_raw_line()
if s.len == 0 {
break
}
f.write_string(s) or { exit_error(err.msg()) }
}
return tmp
}

@[noreturn]
fn exit_error(msg string) {
if msg.len > 0 {
eprintln('${app_name}: ${error}')
}
eprintln("Try '${app_name} --help' for more information.")
exit(2) // exit(1) is used with the -c option
}
177 changes: 177 additions & 0 deletions src/sort/sort.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import os
import arrays
import strconv

const space = ` `
const tab = `\t`

fn main() {
options := get_options()
results := sort(options)
if options.output_file == '' {
for result in results {
println(result)
}
} else {
os.write_lines(options.output_file, results) or { exit_error(err.msg()) }
}
}

fn sort(options Options) []string {
mut results := []string{}
for file in options.files {
results << do_sort(file, options)
}
return results
}

fn do_sort(file string, options Options) []string {
mut lines := os.read_lines(file) or { exit_error(err.msg()) }
original := if options.check_diagnose || options.check_quiet {
lines.clone()
} else {
[]string{}
}
match true {
// order matters here
options.sort_keys.len > 0 { sort_key(mut lines, options) }
options.numeric { sort_general_numeric(mut lines, options) }
options.ignore_case { sort_ignore_case(mut lines, options) }
options.dictionary_order { sort_dictionary_order(mut lines, options) }
options.ignore_non_printing { sort_ignore_non_printing(mut lines, options) }
options.ignore_leading_blanks { sort_ignore_leading_blanks(mut lines, options) }
else { sort_lines(mut lines, options) }
}
if options.unique {
lines = arrays.distinct(lines)
}
if original.len > 0 {
if lines != original {
if options.check_diagnose {
println('sort: not sorted')
}
exit(1)
} else {
if options.check_diagnose {
println('sort: already sorted')
}
exit(0)
}
}
return lines
}

fn sort_lines(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
return cmp(a, b)
})
}

fn compare_strings_reverse(a &string, b &string) int {
return compare_strings(b, a)
}

fn sort_ignore_case(mut lines []string, options Options) {
lines.sort_ignore_case()
if options.reverse {
lines.reverse_in_place()
}
}

// Ignore leading blanks when finding sort keys in each line.
// By default a blank is a space or a tab
fn sort_ignore_leading_blanks(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
return cmp(trim_leading_spaces(a), trim_leading_spaces(b))
})
}

fn trim_leading_spaces(s string) string {
return s.trim_left(' \n\t\v\f\r')
}

// Sort in phone directory order: ignore all characters except letters, digits
// and blanks when sorting. By default letters and digits are those of ASCII
fn sort_dictionary_order(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
aa := a.bytes().map(is_dictionary_char).bytestr()
bb := b.bytes().map(is_dictionary_char).bytestr()
return cmp(aa, bb)
})
}

fn is_dictionary_char(e u8) u8 {
return match e.is_digit() || e.is_letter() || e == space || e == tab {
true { e }
else { space }
}
}

// Sort numerically, converting a prefix of each line to a long double-precision
// floating point number. See Floating point numbers. Do not report overflow,
// underflow, or conversion errors. Use the following collating sequence:
// Lines that do not start with numbers (all considered to be equal).
// - NaNs (“Not a Number” values, in IEEE floating point arithmetic) in a
// consistent but machine-dependent order.
// - Minus infinity.
// - Finite numbers in ascending numeric order (with -0 and +0 equal).
// - Plus infinity
fn sort_general_numeric(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp, options] (a &string, b &string) int {
numeric_a, rest_a := numeric_rest(a)
numeric_b, rest_b := numeric_rest(b)
numeric_diff := if options.reverse { numeric_b - numeric_a } else { numeric_a - numeric_b }
return if numeric_diff != 0 {
if numeric_diff > 0 { 1 } else { -1 }
} else {
cmp(rest_a, rest_b)
}
})
}

const minus_infinity = f64(-0xFFFFFFFFFFFFFFF)

fn numeric_rest(s string) (f64, string) {
mut num := 0.0
mut rest := s
mut allow_blanks := true
mut allow_sign := true
for i := 0; i < s.len; i++ {
c := s[i]
if allow_blanks && c == space {
continue
}
if allow_sign && (c == `-` || c == `+`) {
allow_sign = false
allow_blanks = false
continue
}
if c.is_digit() || c == strconv.c_dpoint {
allow_sign = false
allow_blanks = false
continue
}
num = strconv.atof64(s[0..i]) or { minus_infinity }
rest = s[i..].clone()
}
return num, rest
}

// This option has no effect if the stronger --dictionary-order (-d) option
// is also given.
fn sort_ignore_non_printing(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
aa := a.bytes().map(is_printable).bytestr()
bb := b.bytes().map(is_printable).bytestr()
return cmp(aa, bb)
})
}

fn is_printable(e u8) u8 {
return if e >= u8(` `) && e <= u8(`~`) { e } else { space }
}
Loading

0 comments on commit d906022

Please sign in to comment.