Skip to content

Commit

Permalink
update documentation
Browse files Browse the repository at this point in the history
Signed-off-by: Caleb Grant <[email protected]>
  • Loading branch information
geocoug committed Aug 1, 2024
1 parent 9d7cec0 commit 516bcee
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ Go from this:
To this:

![Crosstab Output](https://raw.githubusercontent.com/geocoug/crosstab/main/crosstab-output.png)
![Crosstab Metadata](https://raw.githubusercontent.com/geocoug/crosstab/main/crosstab-metadata.png)

## Installation

Expand Down Expand Up @@ -47,9 +46,9 @@ from crosstab import Crosstab
Crosstab(
incsv=Path("data.csv"),
outxlsx=Path("crosstabbed_data.xlsx"),
row_headers=("location_id", "sample_id", "sample_date"),
col_headers=("parameter", "cas_rn"),
value_cols=("result", "unit", "qualifiers"),
row_headers=("location", "sample"),
col_headers=("cas_rn", "parameter"),
value_cols=("concentration", "units"),
keep_sqlite=True,
keep_src=True,
).crosstab()
Expand All @@ -58,11 +57,11 @@ Crosstab(
### Command Line

```bash
crosstab -k -s -f data.csv -o crosstabbed_data.xlsx -r location_id sample_id sample_date -c parameter cas_rn -v result unit qualifiers
crosstab -k -s -f data.csv -o crosstabbed_data.xlsx -r location sample -c cas_rn parameter -v concentration units
```

### Docker

```bash
docker run --rm -v $(pwd):/data ghcr.io/geocoug/crosstab:latest -k -s -f /data/data.csv -o /data/crosstabbed_data.xlsx -r location_id sample_id sample_date -c parameter cas_rn -v result unit qualifiers
docker run --rm -v $(pwd):/data ghcr.io/geocoug/crosstab:latest -k -s -f /data/data.csv -o /data/crosstabbed_data.xlsx -r location sample -c cas_rn parameter -v concentration units
```
22 changes: 12 additions & 10 deletions crosstab/crosstab.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,15 @@ def __init__(
Args:
incsv (Path): Path to the input CSV file.
outxlsx (Path): Path to the output XLSX file.
row_headers (tuple): Tuple of column headers to use as row headers.
col_headers (tuple): Tuple of column headers to use as column headers.
value_cols (tuple): Tuple of column headers to use as value columns.
keep_sqlite (bool, optional): Keep the SQLite database file. Defaults to False.
keep_src (bool, optional): Keep the raw data sheet in the output file. Defaults
to False.
outxlsx (Path): Path to the output XLSX file. The output file will contain at a minimum two sheets: one containing metadata about the crosstab and one containing the crosstab table. If the keep_src argument is True, the output file will contain a third sheet with the source data.
row_headers (tuple): Tuple of one or more column names to use as row headers. Unique values of these columns will appear at the beginning of every output line.
col_headers (tuple): Tuple of one or more column names to use as column headers in the output. A crosstab column (or columns) will be created for every unique combination of values of these fields in the input.
value_cols (tuple): Tuple of one or more column names with values to be used to fill the cells of the cross-table. If n columns names are specified, then there will be n columns in the output table for each of the column headers corresponding to values of the -c argument. The column names specified with the -v argument will be appended to the output column headers created from values of the -c argument. There should be only one value of the -v column(s) for each combination of the -r and -c columns; if there is more than one, a warning will be printed and only the first value will appear in the output. (That is, values are not combined in any way when there are multiple values for each output cell.)
keep_sqlite (bool, optional): Keep the temporary SQLite database file. The default is to delete it after the output file is created. The SQLite file is created in the same directory as the output file with the name of the output file (but with a .sqlite extension) and a single table named 'data'. Defaults to False.
keep_src (bool, optional): Keep a sheet with the source data in the output file. The sheet will be named 'Source Data'. Defaults to False.
Raises:
ValueError: Raised if the input file does not exist, is not a file, is empty, is not a CSV file, or if the row_headers, col_headers, or value_cols are not specified.
ValueError: Raised if the input file does not exist, is not a file, is empty, is not a CSV file, or if the row_headers, col_headers, or value_cols are not specified. Also raised if the output file does not have an XLSX extension.
Example:
Expand Down Expand Up @@ -254,7 +253,10 @@ def _csv_to_sqlite(self: Crosstab) -> sqlite3.Connection:
return conn

def crosstab(self: Crosstab) -> None:
"""Create a crosstab table from the input CSV file."""
"""Create a crosstab table from the input CSV file.
The crosstab table will be written to the output XLSX file. The table will have row headers, column headers, and value columns as specified in the `row_headers`, `col_headers`, and `value_cols` arguments. The table will be written to a sheet named *Crosstab*. If the `keep_src` argument is `True`, a sheet named *Source Data* will be created with the source data from the input CSV file. A sheet named *README* will be created with metadata about the crosstab process. The metadata will include the creation time, user, script version, input file, output file, and SQLite file (if the `keep_sqlite` argument is `True`). Both the *README* and *Crosstab* sheets will be styled to make the table easier to read.
""" # noqa: E501
logger.debug("Starting crosstab routine.")

# Get list of unique values for each row header
Expand Down Expand Up @@ -495,7 +497,7 @@ def clparser() -> argparse.ArgumentParser:
"-s",
"--keep-src",
action="store_true",
help="keep a sheet with the raw data in the output file. The default is to not include the raw data in the output file.", # noqa: E501
help="keep a sheet with the source data in the output file. The default is to not include the source data in the output file.", # noqa: E501
)
parser.add_argument(
"-f",
Expand Down

0 comments on commit 516bcee

Please sign in to comment.