diff --git a/tools/xls2tsv/.shed.yml b/tools/xls2tsv/.shed.yml new file mode 100644 index 00000000..fbac33a2 --- /dev/null +++ b/tools/xls2tsv/.shed.yml @@ -0,0 +1,8 @@ +categories: + - Text Manipulation +description: Convert an xlsx file to a tabular +long_description: Extract one sheet from an xlsx/xls file and convert it to a tabular format +name: xlsx2tsv +owner: ufz +homepage_url: https://github.com/Helmholtz-UFZ/galaxy-tools +remote_repository_url: https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/xls2tsv diff --git a/tools/xls2tsv/test-data/excel_test.xlsx b/tools/xls2tsv/test-data/excel_test.xlsx new file mode 100644 index 00000000..b9428aa9 Binary files /dev/null and b/tools/xls2tsv/test-data/excel_test.xlsx differ diff --git a/tools/xls2tsv/test-data/output_sheet_1.tsv b/tools/xls2tsv/test-data/output_sheet_1.tsv new file mode 100644 index 00000000..7c040524 --- /dev/null +++ b/tools/xls2tsv/test-data/output_sheet_1.tsv @@ -0,0 +1,4 @@ +column0 column1 +test1 value1 +test2 value2 +test3 value3 diff --git a/tools/xls2tsv/test-data/output_sheet_2.tsv b/tools/xls2tsv/test-data/output_sheet_2.tsv new file mode 100644 index 00000000..b4ddb905 --- /dev/null +++ b/tools/xls2tsv/test-data/output_sheet_2.tsv @@ -0,0 +1,4 @@ +column2 column3 +test4 value4 +test5 value5 +test6 value6 diff --git a/tools/xls2tsv/xlsx2tsv.py b/tools/xls2tsv/xlsx2tsv.py new file mode 100644 index 00000000..37b3bcaf --- /dev/null +++ b/tools/xls2tsv/xlsx2tsv.py @@ -0,0 +1,32 @@ +import argparse + +import pandas as pd + + +def convert_xlsx_to_tsv(input_file, sheet_name, output): + try: + # Read the specified sheet and convert them to tsv + df = pd.read_excel(input_file, sheet_name=sheet_name) + df.to_csv(output, sep='\t', index=False) + print(f"Extracted sheet '{sheet_name}' from {input_file}") + + except Exception as e: + print(f"Failed to convert sheet '{sheet_name}' from {input_file}: {e}") + + +def main(): + parser = argparse.ArgumentParser(description="Convert specific sheets from a single .xlsx file to .tsv format in the same directory.") + parser.add_argument("--input-file", type=str, required=True, help="Path to the input .xlsx file.") + parser.add_argument("--sheet-names", type=str, required=True, help="Comma-separated list of sheet names to convert.") + parser.add_argument("--output", type=str, default="extracted_sheet.tsv", required=False, help="Suffix for the tsv file") + args = parser.parse_args() + + # Convert sheet names from str to list + sheet_names = args.sheet_names + + # Call the conversion function with the provided arguments + convert_xlsx_to_tsv(args.input_file, sheet_names, args.output) + + +if __name__ == "__main__": + main() diff --git a/tools/xls2tsv/xlsx2tsv.xml b/tools/xls2tsv/xlsx2tsv.xml new file mode 100644 index 00000000..d46e7ab9 --- /dev/null +++ b/tools/xls2tsv/xlsx2tsv.xml @@ -0,0 +1,50 @@ + + with pandas + + pandas + openpyxl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Description +----------- +Extract a sheet from XLS/XLSX file to a tabular file + + + 10.5281/zenodo.13819579 + + \ No newline at end of file