From 0918824affd53b825ace51e7b55ee98f560765b0 Mon Sep 17 00:00:00 2001 From: Ujjwal Kumar Date: Thu, 28 Nov 2024 16:06:40 +0530 Subject: [PATCH 1/2] renamed --- {boxtodocx/.github => .github}/README.md | 0 .../.github => .github}/workflows/ci.yml | 0 .gitignore | 7 +- boxtodocx/CHANGELOG.md => CHANGELOG.md | 4 +- boxtodocx/CONTRIBUTING.md => CONTRIBUTING.md | 10 +- LICENSE | 21 --- boxtodocx/MANIFEST.in => MANIFEST.in | 2 +- README.md | 69 +++++--- boxtodocx/Test.boxnote => Test.boxnote | 0 boxtodocx/.gitignore | 163 ------------------ boxtodocx/README.md | 62 ------- boxtodocx/src/boxtodocx/handlers/Readme.md | 3 - boxtodocx/src/boxtodocx/handlers/__init__.py | 5 - boxtodocx/src/boxtodocx/mappers/Readme.md | 3 - ...quirements-dev.txt => requirements-dev.txt | 0 .../requirements.txt => requirements.txt | 0 boxtodocx/setup.py => setup.py | 12 +- {boxtodocx/src => src}/README.md | 2 +- {boxtodocx/src => src}/boxtodocx/__init__.py | 2 +- {boxtodocx/src => src}/boxtodocx/__main__.py | 4 +- {boxtodocx/src => src}/boxtodocx/cli.py | 6 +- {boxtodocx/src => src}/boxtodocx/converter.py | 24 +-- src/boxtodocx/handlers/Readme.md | 3 + src/boxtodocx/handlers/__init__.py | 5 + .../boxtodocx/handlers/docx_handler.py | 6 +- .../boxtodocx/handlers/html_handler.py | 4 +- src/boxtodocx/mappers/Readme.md | 3 + .../src => src}/boxtodocx/mappers/__init__.py | 2 +- .../boxtodocx/mappers/html_mapper.py | 2 +- .../src => src}/boxtodocx/utils/Readme.md | 0 .../src => src}/boxtodocx/utils/__init__.py | 0 .../src => src}/boxtodocx/utils/logger.py | 0 {boxtodocx/tests => tests}/README.md | 0 {boxtodocx/tests => tests}/__init__.py | 0 34 files changed, 101 insertions(+), 323 deletions(-) rename {boxtodocx/.github => .github}/README.md (100%) rename {boxtodocx/.github => .github}/workflows/ci.yml (100%) rename boxtodocx/CHANGELOG.md => CHANGELOG.md (87%) rename boxtodocx/CONTRIBUTING.md => CONTRIBUTING.md (83%) delete mode 100644 LICENSE rename boxtodocx/MANIFEST.in => MANIFEST.in (85%) rename boxtodocx/Test.boxnote => Test.boxnote (100%) delete mode 100644 boxtodocx/.gitignore delete mode 100644 boxtodocx/README.md delete mode 100644 boxtodocx/src/boxtodocx/handlers/Readme.md delete mode 100644 boxtodocx/src/boxtodocx/handlers/__init__.py delete mode 100644 boxtodocx/src/boxtodocx/mappers/Readme.md rename boxtodocx/requirements-dev.txt => requirements-dev.txt (100%) rename boxtodocx/requirements.txt => requirements.txt (100%) rename boxtodocx/setup.py => setup.py (78%) rename {boxtodocx/src => src}/README.md (83%) rename {boxtodocx/src => src}/boxtodocx/__init__.py (67%) rename {boxtodocx/src => src}/boxtodocx/__main__.py (58%) rename {boxtodocx/src => src}/boxtodocx/cli.py (92%) rename {boxtodocx/src => src}/boxtodocx/converter.py (94%) create mode 100644 src/boxtodocx/handlers/Readme.md create mode 100644 src/boxtodocx/handlers/__init__.py rename {boxtodocx/src => src}/boxtodocx/handlers/docx_handler.py (99%) rename {boxtodocx/src => src}/boxtodocx/handlers/html_handler.py (98%) create mode 100644 src/boxtodocx/mappers/Readme.md rename {boxtodocx/src => src}/boxtodocx/mappers/__init__.py (81%) rename {boxtodocx/src => src}/boxtodocx/mappers/html_mapper.py (99%) rename {boxtodocx/src => src}/boxtodocx/utils/Readme.md (100%) rename {boxtodocx/src => src}/boxtodocx/utils/__init__.py (100%) rename {boxtodocx/src => src}/boxtodocx/utils/logger.py (100%) rename {boxtodocx/tests => tests}/README.md (100%) rename {boxtodocx/tests => tests}/__init__.py (100%) diff --git a/boxtodocx/.github/README.md b/.github/README.md similarity index 100% rename from boxtodocx/.github/README.md rename to .github/README.md diff --git a/boxtodocx/.github/workflows/ci.yml b/.github/workflows/ci.yml similarity index 100% rename from boxtodocx/.github/workflows/ci.yml rename to .github/workflows/ci.yml diff --git a/.gitignore b/.gitignore index 82f9275..eb320fe 100644 --- a/.gitignore +++ b/.gitignore @@ -106,10 +106,8 @@ ipython_config.py #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +# https://pdm.fming.dev/#use-with-ide .pdm.toml -.pdm-python -.pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ @@ -160,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +path/* +*.docx diff --git a/boxtodocx/CHANGELOG.md b/CHANGELOG.md similarity index 87% rename from boxtodocx/CHANGELOG.md rename to CHANGELOG.md index a3d9360..6c9acca 100644 --- a/boxtodocx/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [1.0.0] - 2024-11-28 ### Added -- Initial release of boxnotetodocx +- Initial release of boxtodocx - Command-line interface for converting BoxNote files - Support for single file conversion - Support for batch directory conversion - Box API integration for image downloading -- HTML to DOCX conversion with formatting preservation +- HTML to docx conversion with formatting preservation - Support for tables, lists, and images - Comprehensive error handling and logging - Documentation and contribution guidelines diff --git a/boxtodocx/CONTRIBUTING.md b/CONTRIBUTING.md similarity index 83% rename from boxtodocx/CONTRIBUTING.md rename to CONTRIBUTING.md index e40b8a8..4d7dc23 100644 --- a/boxtodocx/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,13 +1,13 @@ -# Contributing to BoxNote to DOCX Converter +# Contributing to BoxNote to docx Converter -Thank you for your interest in contributing to BoxNote to DOCX Converter! This document provides guidelines and instructions for contributing. +Thank you for your interest in contributing to BoxNote to docx Converter! This document provides guidelines and instructions for contributing. ## Development Setup 1. Clone the repository: ```bash - git clone git@github.com:ujjwal-ibm/boxnoteconvertor.git - cd boxnoteconvertor/boxnotetodocx + git clone git@github.com:ujjwal-ibm/boxtodocx.git + cd boxtodocx ``` 2. Create a virtual environment: @@ -47,7 +47,7 @@ pytest For test coverage: ```bash -pytest --cov=boxnotetodocx +pytest --cov=boxtodocx ``` ## Pull Request Process diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 8e5f43f..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2024 Ujjwal Kumar - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/boxtodocx/MANIFEST.in b/MANIFEST.in similarity index 85% rename from boxtodocx/MANIFEST.in rename to MANIFEST.in index 231dd1d..cddf416 100644 --- a/boxtodocx/MANIFEST.in +++ b/MANIFEST.in @@ -5,7 +5,7 @@ include CHANGELOG.md include requirements.txt include requirements-dev.txt -recursive-include src/boxnotetodocx * +recursive-include src/boxtodocx * recursive-exclude * __pycache__ recursive-exclude * *.py[cod] recursive-exclude tests * \ No newline at end of file diff --git a/README.md b/README.md index 263d14c..005283f 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,62 @@ -# boxnoteconvertor -A tool to convert boxnotes to docx(for now) +# BoxNote to docx Converter +A Python command-line tool to convert Box Notes to Microsoft Word (docx) documents with preservation of formatting, tables, and images. -### **Getting Started with `boxtodocx`** +## Features -set up the core tool—`boxtodocx`. Follow these steps: +- Convert single BoxNote files to docx +- Batch convert entire directories of BoxNote files +- Preserve formatting, tables, and images +- Support for Box API authentication +- Simple command-line interface + +## Installation -#### **1. Clone the Repository** ```bash -git clone https://github.com/ujjwal-ibm/boxnoteconvertor.git + +pip3 install . ``` -#### **2. Navigate to the Tool Directory** +## Usage + +### Basic Usage + +Convert a single file: ```bash -cd boxnoteconvertor/boxtodocx +boxtodocx example.boxnote ``` -#### **3. Install the Tool** -Install the package using Python’s package manager: +Convert all files in a directory: ```bash -pip3 install . +boxtodocx /path/to/directory ``` -#### **4. Verify Installation** -You can now use the command-line tool: +### Advanced Options + ```bash -boxnotetodocx --help +boxtodocx --help + +Options: + -d, --dir TEXT Work directory for temporary files + -t, --token TEXT Box access token + -o, --output TEXT Output file name (only for single file conversion) + -u, --user TEXT Box user id + -v, --verbose Enable verbose logging + --help Show this message and exit ``` -#### **5. Conversion Examples** -- **Single File Conversion**: - ```bash - boxnotetodocx example.boxnote - ``` -- **Batch Conversion for a Directory**: - ```bash - boxnotetodocx /path/to/directory - ``` \ No newline at end of file +## Authentication + +To use Box API features (like image downloading), you need to provide a Box access token: + +```bash +boxtodocx input.boxnote -t "your_box_token" -u "your_user_id" +``` + +## Development + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/boxtodocx/Test.boxnote b/Test.boxnote similarity index 100% rename from boxtodocx/Test.boxnote rename to Test.boxnote diff --git a/boxtodocx/.gitignore b/boxtodocx/.gitignore deleted file mode 100644 index eb320fe..0000000 --- a/boxtodocx/.gitignore +++ /dev/null @@ -1,163 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -path/* -*.docx diff --git a/boxtodocx/README.md b/boxtodocx/README.md deleted file mode 100644 index ac18164..0000000 --- a/boxtodocx/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# BoxNote to DOCX Converter - -A Python command-line tool to convert Box Notes to Microsoft Word (DOCX) documents with preservation of formatting, tables, and images. - -## Features - -- Convert single BoxNote files to DOCX -- Batch convert entire directories of BoxNote files -- Preserve formatting, tables, and images -- Support for Box API authentication -- Simple command-line interface - -## Installation - -```bash - -pip3 install . -``` - -## Usage - -### Basic Usage - -Convert a single file: -```bash -boxnotetodocx example.boxnote -``` - -Convert all files in a directory: -```bash -boxnotetodocx /path/to/directory -``` - -### Advanced Options - -```bash -boxnotetodocx --help - -Options: - -d, --dir TEXT Work directory for temporary files - -t, --token TEXT Box access token - -o, --output TEXT Output file name (only for single file conversion) - -u, --user TEXT Box user id - -v, --verbose Enable verbose logging - --help Show this message and exit -``` - -## Authentication - -To use Box API features (like image downloading), you need to provide a Box access token: - -```bash -boxnotetodocx input.boxnote -t "your_box_token" -u "your_user_id" -``` - -## Development - -See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines. - -## License - -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/boxtodocx/src/boxtodocx/handlers/Readme.md b/boxtodocx/src/boxtodocx/handlers/Readme.md deleted file mode 100644 index 240006d..0000000 --- a/boxtodocx/src/boxtodocx/handlers/Readme.md +++ /dev/null @@ -1,3 +0,0 @@ -# Handlers Directory - -Contains the core processing logic for HTML parsing and DOCX generation. diff --git a/boxtodocx/src/boxtodocx/handlers/__init__.py b/boxtodocx/src/boxtodocx/handlers/__init__.py deleted file mode 100644 index 62d843e..0000000 --- a/boxtodocx/src/boxtodocx/handlers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Handlers package for BoxNote conversion.""" -from boxnotetodocx.handlers.html_handler import BoxNoteParser -from boxnotetodocx.handlers.docx_handler import HtmlToDocx - -__all__ = ['BoxNoteParser', 'HtmlToDocx'] \ No newline at end of file diff --git a/boxtodocx/src/boxtodocx/mappers/Readme.md b/boxtodocx/src/boxtodocx/mappers/Readme.md deleted file mode 100644 index f3b9525..0000000 --- a/boxtodocx/src/boxtodocx/mappers/Readme.md +++ /dev/null @@ -1,3 +0,0 @@ -# Mappers Directory - -Contains mapping definitions for HTML to DOCX conversion. diff --git a/boxtodocx/requirements-dev.txt b/requirements-dev.txt similarity index 100% rename from boxtodocx/requirements-dev.txt rename to requirements-dev.txt diff --git a/boxtodocx/requirements.txt b/requirements.txt similarity index 100% rename from boxtodocx/requirements.txt rename to requirements.txt diff --git a/boxtodocx/setup.py b/setup.py similarity index 78% rename from boxtodocx/setup.py rename to setup.py index b76d5af..f8e468a 100644 --- a/boxtodocx/setup.py +++ b/setup.py @@ -7,17 +7,17 @@ requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")] setup( - name="boxnotetodocx", + name="boxtodocx", version="1.0.0", author="Ujjwal Kumar", author_email="ujjwal.kumar1@ibm.com", - description="Convert Box Notes to Microsoft Word (DOCX) documents", + description="Convert Box Notes to Microsoft Word (docx) documents", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/ujjwal-ibm/boxnoteconvertor", + url="https://github.com/ujjwal-ibm/boxtodocx", project_urls={ - "Bug Tracker": "https://github.com/ujjwal-ibm/boxnoteconvertor/issues", - "Documentation": "https://github.com/ujjwal-ibm/boxnoteconvertor", + "Bug Tracker": "https://github.com/ujjwal-ibm/boxtodocx/issues", + "Documentation": "https://github.com/ujjwal-ibm/boxtodocx", }, classifiers=[ "Development Status :: 5 - Production/Stable", @@ -38,7 +38,7 @@ install_requires=requirements, entry_points={ "console_scripts": [ - "boxnotetodocx=boxnotetodocx.cli:main", + "boxtodocx=boxtodocx.cli:main", ], }, include_package_data=True, diff --git a/boxtodocx/src/README.md b/src/README.md similarity index 83% rename from boxtodocx/src/README.md rename to src/README.md index 9e7648d..44c45e0 100644 --- a/boxtodocx/src/README.md +++ b/src/README.md @@ -1,3 +1,3 @@ # Source Code Structure -This directory contains the main package code for the BoxNote to DOCX converter. +This directory contains the main package code for the BoxNote to docx converter. diff --git a/boxtodocx/src/boxtodocx/__init__.py b/src/boxtodocx/__init__.py similarity index 67% rename from boxtodocx/src/boxtodocx/__init__.py rename to src/boxtodocx/__init__.py index adace61..a3010a5 100644 --- a/boxtodocx/src/boxtodocx/__init__.py +++ b/src/boxtodocx/__init__.py @@ -1,4 +1,4 @@ -"""BoxNote to DOCX converter package.""" +"""BoxNote to docx converter package.""" __version__ = "1.0.0" __author__ = "Ujjwal Kumar" diff --git a/boxtodocx/src/boxtodocx/__main__.py b/src/boxtodocx/__main__.py similarity index 58% rename from boxtodocx/src/boxtodocx/__main__.py rename to src/boxtodocx/__main__.py index e2b115b..84ef3a1 100644 --- a/boxtodocx/src/boxtodocx/__main__.py +++ b/src/boxtodocx/__main__.py @@ -1,7 +1,7 @@ -# src/boxnotetodocx/__main__.py +# src/boxtodocx/__main__.py """Entry point for running the package as a module.""" -from boxnotetodocx.cli import main +from boxtodocx.cli import main if __name__ == "__main__": main() \ No newline at end of file diff --git a/boxtodocx/src/boxtodocx/cli.py b/src/boxtodocx/cli.py similarity index 92% rename from boxtodocx/src/boxtodocx/cli.py rename to src/boxtodocx/cli.py index d2b276a..41bc6df 100644 --- a/boxtodocx/src/boxtodocx/cli.py +++ b/src/boxtodocx/cli.py @@ -4,8 +4,8 @@ import click -from boxnotetodocx.converter import BoxNoteConverter -from boxnotetodocx.utils.logger import setup_logger, get_logger +from boxtodocx.converter import BoxNoteConverter +from boxtodocx.utils.logger import setup_logger, get_logger logger = get_logger(__name__) @@ -20,7 +20,7 @@ def main(input_path: str, dir: Optional[str], token: Optional[str], output: Optional[str], user: Optional[str], verbose: bool) -> int: """ - Convert BoxNote files to DOCX format. + Convert BoxNote files to docx format. INPUT_PATH can be a single .boxnote file or a directory containing multiple .boxnote files. """ diff --git a/boxtodocx/src/boxtodocx/converter.py b/src/boxtodocx/converter.py similarity index 94% rename from boxtodocx/src/boxtodocx/converter.py rename to src/boxtodocx/converter.py index a6d03c9..57e9e8a 100644 --- a/boxtodocx/src/boxtodocx/converter.py +++ b/src/boxtodocx/converter.py @@ -1,18 +1,18 @@ import argparse from pathlib import Path -from boxnotetodocx.handlers.html_handler import BoxNoteParser -from boxnotetodocx.handlers.docx_handler import HtmlToDocx -from boxnotetodocx.utils.logger import get_logger +from boxtodocx.handlers.html_handler import BoxNoteParser +from boxtodocx.handlers.docx_handler import HtmlToDocx +from boxtodocx.utils.logger import get_logger import sys import traceback import uuid from typing import Optional -from boxnotetodocx.utils.logger import get_logger, setup_logger +from boxtodocx.utils.logger import get_logger, setup_logger logger = get_logger(__name__) class BoxNoteConverter: - """Main converter class for BoxNote to DOCX conversion.""" + """Main converter class for BoxNote to docx conversion.""" def __init__(self, workdir: Path, token: Optional[str] = None, user_id: Optional[str] = None): self.workdir = workdir @@ -75,8 +75,8 @@ def convert_single_file( with open(temp_html, 'w', encoding='utf-8') as f: f.write(html_content) - # Convert HTML to DOCX - logger.info(f"Converting to DOCX: {output_docx}") + # Convert HTML to docx + logger.info(f"Converting to docx: {output_docx}") docx_parser = HtmlToDocx(self.workdir) docx_parser.table_style = 'TableGrid' @@ -84,7 +84,7 @@ def convert_single_file( docx_parser.parse_html_file(str(temp_html), str(output_docx.with_suffix(''))) logger.info(f"Successfully created: {output_docx}") except Exception as e: - logger.error(f"DOCX conversion failed: {str(e)}") + logger.error(f"docx conversion failed: {str(e)}") raise except Exception as e: @@ -186,8 +186,8 @@ def convert_single_file( with open(temp_html, 'w', encoding='utf-8') as f: f.write(html_content) - # Convert HTML to DOCX - logger.info(f"Converting to DOCX: {output_docx}") + # Convert HTML to docx + logger.info(f"Converting to docx: {output_docx}") docx_parser = HtmlToDocx(workdir) docx_parser.table_style = 'TableGrid' @@ -195,7 +195,7 @@ def convert_single_file( docx_parser.parse_html_file(str(temp_html), str(output_docx.with_suffix(''))) # Remove .docx extension logger.info(f"Successfully created: {output_docx}") except Exception as e: - logger.error(f"DOCX conversion failed: {str(e)}") + logger.error(f"docx conversion failed: {str(e)}") logger.error(traceback.format_exc()) raise @@ -239,7 +239,7 @@ def convert_folder( logger.info(f"Failed: {failed}") def main(): - parser = argparse.ArgumentParser(description='Convert BoxNote files to DOCX format') + parser = argparse.ArgumentParser(description='Convert BoxNote files to docx format') parser.add_argument('input', help='Input file or directory path') parser.add_argument('-d', '--dir', help='Work directory for temporary files') parser.add_argument('-t', '--token', help='Box access token') diff --git a/src/boxtodocx/handlers/Readme.md b/src/boxtodocx/handlers/Readme.md new file mode 100644 index 0000000..16c6dfc --- /dev/null +++ b/src/boxtodocx/handlers/Readme.md @@ -0,0 +1,3 @@ +# Handlers Directory + +Contains the core processing logic for HTML parsing and docx generation. diff --git a/src/boxtodocx/handlers/__init__.py b/src/boxtodocx/handlers/__init__.py new file mode 100644 index 0000000..ce64b8f --- /dev/null +++ b/src/boxtodocx/handlers/__init__.py @@ -0,0 +1,5 @@ +"""Handlers package for BoxNote conversion.""" +from boxtodocx.handlers.html_handler import BoxNoteParser +from boxtodocx.handlers.docx_handler import HtmlToDocx + +__all__ = ['BoxNoteParser', 'HtmlToDocx'] \ No newline at end of file diff --git a/boxtodocx/src/boxtodocx/handlers/docx_handler.py b/src/boxtodocx/handlers/docx_handler.py similarity index 99% rename from boxtodocx/src/boxtodocx/handlers/docx_handler.py rename to src/boxtodocx/handlers/docx_handler.py index 8b0b701..7a430f3 100644 --- a/boxtodocx/src/boxtodocx/handlers/docx_handler.py +++ b/src/boxtodocx/handlers/docx_handler.py @@ -11,7 +11,7 @@ import os from typing import Dict, Any, Optional -from boxnotetodocx.utils.logger import get_logger +from boxtodocx.utils.logger import get_logger logger = get_logger(__name__) @@ -228,7 +228,7 @@ def handle_data(self, data): logger.error(f"Error handling data: {e}") def parse_html_file(self, input_file: str, output_file: str = None) -> None: - """Parse HTML file to DOCX with enhanced error handling""" + """Parse HTML file to docx with enhanced error handling""" try: # Read HTML file with open(input_file, 'r', encoding='utf-8') as infile: @@ -547,7 +547,7 @@ def handle_blockquote(self, content): logger.warning(f"Error handling blockquote: {e}") def parse_html_file(self, input_file: str, output_file: str = None): - """Parse HTML file to DOCX with enhanced error handling""" + """Parse HTML file to docx with enhanced error handling""" try: with open(input_file, 'r', encoding='utf-8') as infile: html = infile.read() diff --git a/boxtodocx/src/boxtodocx/handlers/html_handler.py b/src/boxtodocx/handlers/html_handler.py similarity index 98% rename from boxtodocx/src/boxtodocx/handlers/html_handler.py rename to src/boxtodocx/handlers/html_handler.py index 7d8caf0..1dce5d1 100644 --- a/boxtodocx/src/boxtodocx/handlers/html_handler.py +++ b/src/boxtodocx/handlers/html_handler.py @@ -1,10 +1,10 @@ import json from typing import Dict, List, Union, Optional, Any from pathlib import Path -from boxnotetodocx.mappers import html_mapper +from boxtodocx.mappers import html_mapper import re from bs4 import BeautifulSoup -from boxnotetodocx.utils.logger import get_logger +from boxtodocx.utils.logger import get_logger logger = get_logger(__name__) diff --git a/src/boxtodocx/mappers/Readme.md b/src/boxtodocx/mappers/Readme.md new file mode 100644 index 0000000..1e4adea --- /dev/null +++ b/src/boxtodocx/mappers/Readme.md @@ -0,0 +1,3 @@ +# Mappers Directory + +Contains mapping definitions for HTML to docx conversion. diff --git a/boxtodocx/src/boxtodocx/mappers/__init__.py b/src/boxtodocx/mappers/__init__.py similarity index 81% rename from boxtodocx/src/boxtodocx/mappers/__init__.py rename to src/boxtodocx/mappers/__init__.py index c03f74f..318b5b3 100644 --- a/boxtodocx/src/boxtodocx/mappers/__init__.py +++ b/src/boxtodocx/mappers/__init__.py @@ -1,5 +1,5 @@ """Mapper package for BoxNote conversion.""" -from boxnotetodocx.mappers.html_mapper import * +from boxtodocx.mappers.html_mapper import * __all__ = [ 'get_tag_open', diff --git a/boxtodocx/src/boxtodocx/mappers/html_mapper.py b/src/boxtodocx/mappers/html_mapper.py similarity index 99% rename from boxtodocx/src/boxtodocx/mappers/html_mapper.py rename to src/boxtodocx/mappers/html_mapper.py index ddff64e..6131fc6 100644 --- a/boxtodocx/src/boxtodocx/mappers/html_mapper.py +++ b/src/boxtodocx/mappers/html_mapper.py @@ -4,7 +4,7 @@ import requests import base64 from urllib.parse import unquote -from boxnotetodocx.utils.logger import get_logger +from boxtodocx.utils.logger import get_logger logger = get_logger(__name__) diff --git a/boxtodocx/src/boxtodocx/utils/Readme.md b/src/boxtodocx/utils/Readme.md similarity index 100% rename from boxtodocx/src/boxtodocx/utils/Readme.md rename to src/boxtodocx/utils/Readme.md diff --git a/boxtodocx/src/boxtodocx/utils/__init__.py b/src/boxtodocx/utils/__init__.py similarity index 100% rename from boxtodocx/src/boxtodocx/utils/__init__.py rename to src/boxtodocx/utils/__init__.py diff --git a/boxtodocx/src/boxtodocx/utils/logger.py b/src/boxtodocx/utils/logger.py similarity index 100% rename from boxtodocx/src/boxtodocx/utils/logger.py rename to src/boxtodocx/utils/logger.py diff --git a/boxtodocx/tests/README.md b/tests/README.md similarity index 100% rename from boxtodocx/tests/README.md rename to tests/README.md diff --git a/boxtodocx/tests/__init__.py b/tests/__init__.py similarity index 100% rename from boxtodocx/tests/__init__.py rename to tests/__init__.py From 0e29c1e5ce5896f5039685f68a98353ff0ad65a2 Mon Sep 17 00:00:00 2001 From: Ujjwal Kumar Date: Thu, 28 Nov 2024 16:11:21 +0530 Subject: [PATCH 2/2] changed all but 1 info logs to debug --- src/boxtodocx/cli.py | 4 +-- src/boxtodocx/converter.py | 44 ++++++++++++++-------------- src/boxtodocx/mappers/html_mapper.py | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/boxtodocx/cli.py b/src/boxtodocx/cli.py index 41bc6df..a8b5bad 100644 --- a/src/boxtodocx/cli.py +++ b/src/boxtodocx/cli.py @@ -43,14 +43,14 @@ def main(input_path: str, dir: Optional[str], token: Optional[str], # Process files if input_path.is_dir(): - logger.info(f"Processing directory: {input_path}") + logger.debug(f"Processing directory: {input_path}") converter.convert_folder(input_path) else: if not input_path.suffix == '.boxnote': logger.error(f"Input file must be a .boxnote file: {input_path}") return 1 - logger.info(f"Processing file: {input_path}") + logger.debug(f"Processing file: {input_path}") converter.convert_single_file(input_path, output_path) return 0 diff --git a/src/boxtodocx/converter.py b/src/boxtodocx/converter.py index 57e9e8a..4f65c17 100644 --- a/src/boxtodocx/converter.py +++ b/src/boxtodocx/converter.py @@ -52,7 +52,7 @@ def convert_single_file( temp_html = self.get_temp_html_path() # Read and parse content - logger.info(f"Reading input file: {input_file}") + logger.debug(f"Reading input file: {input_file}") with open(input_file, 'r', encoding='utf-8') as f: content = f.read() @@ -66,17 +66,17 @@ def convert_single_file( output_docx = output_docx.parent / f'{output_stem}.docx' # Parse BoxNote to HTML - logger.info("Converting BoxNote to HTML") + logger.debug("Converting BoxNote to HTML") parser = BoxNoteParser() html_content = parser.parse(content, clean_name, self.workdir, self.token, self.user_id) # Write temporary HTML - logger.info(f"Writing temporary HTML: {temp_html}") + logger.debug(f"Writing temporary HTML: {temp_html}") with open(temp_html, 'w', encoding='utf-8') as f: f.write(html_content) # Convert HTML to docx - logger.info(f"Converting to docx: {output_docx}") + logger.debug(f"Converting to docx: {output_docx}") docx_parser = HtmlToDocx(self.workdir) docx_parser.table_style = 'TableGrid' @@ -103,25 +103,25 @@ def convert_folder(self, input_path: Path) -> None: successful = 0 failed = 0 - logger.info(f"Found {total_files} BoxNote files in {input_path}") + logger.debug(f"Found {total_files} BoxNote files in {input_path}") for boxnote_file in boxnote_files: try: - logger.info(f"Processing: {boxnote_file.name}") + logger.debug(f"Processing: {boxnote_file.name}") output_docx = None # Let convert_single_file handle the output path self.convert_single_file(boxnote_file, output_docx) successful += 1 - logger.info(f"Successfully converted: {boxnote_file.name}") + logger.debug(f"Successfully converted: {boxnote_file.name}") except Exception as e: failed += 1 logger.error(f"Failed to convert {boxnote_file.name}: {str(e)}") continue # Print summary - logger.info("\nConversion Summary:") - logger.info(f"Total files: {total_files}") - logger.info(f"Successfully converted: {successful}") - logger.info(f"Failed: {failed}") + logger.debug("\nConversion Summary:") + logger.debug(f"Total files: {total_files}") + logger.debug(f"Successfully converted: {successful}") + logger.debug(f"Failed: {failed}") @@ -163,7 +163,7 @@ def convert_single_file( temp_html = self.get_temp_html_path(workdir) # Read and parse content - logger.info(f"Reading input file: {input_file}") + logger.debug(f"Reading input file: {input_file}") with open(input_file, 'r', encoding='utf-8') as f: content = f.read() @@ -178,16 +178,16 @@ def convert_single_file( # Parse BoxNote to HTML from html_parser import parse - logger.info("Converting BoxNote to HTML") + logger.debug("Converting BoxNote to HTML") html_content = parse(content, clean_name, workdir, token, user_id) # Write temporary HTML - logger.info(f"Writing temporary HTML: {temp_html}") + logger.debug(f"Writing temporary HTML: {temp_html}") with open(temp_html, 'w', encoding='utf-8') as f: f.write(html_content) # Convert HTML to docx - logger.info(f"Converting to docx: {output_docx}") + logger.debug(f"Converting to docx: {output_docx}") docx_parser = HtmlToDocx(workdir) docx_parser.table_style = 'TableGrid' @@ -218,25 +218,25 @@ def convert_folder( successful = 0 failed = 0 - logger.info(f"Found {total_files} BoxNote files in {input_path}") + logger.debug(f"Found {total_files} BoxNote files in {input_path}") for boxnote_file in boxnote_files: try: - logger.info(f"Processing: {boxnote_file.name}") + logger.debug(f"Processing: {boxnote_file.name}") output_docx = None # Let convert_single_file handle the output path self.convert_single_file(token, workdir, boxnote_file, output_docx, user_id) successful += 1 - logger.info(f"Successfully converted: {boxnote_file.name}") + logger.debug(f"Successfully converted: {boxnote_file.name}") except Exception as e: failed += 1 logger.error(f"Failed to convert {boxnote_file.name}: {str(e)}") continue # Print summary - logger.info("\nConversion Summary:") - logger.info(f"Total files: {total_files}") - logger.info(f"Successfully converted: {successful}") - logger.info(f"Failed: {failed}") + logger.debug("\nConversion Summary:") + logger.debug(f"Total files: {total_files}") + logger.debug(f"Successfully converted: {successful}") + logger.debug(f"Failed: {failed}") def main(): parser = argparse.ArgumentParser(description='Convert BoxNote files to docx format') diff --git a/src/boxtodocx/mappers/html_mapper.py b/src/boxtodocx/mappers/html_mapper.py index 6131fc6..afd5625 100644 --- a/src/boxtodocx/mappers/html_mapper.py +++ b/src/boxtodocx/mappers/html_mapper.py @@ -358,7 +358,7 @@ def download_image(box_file_id: str, file_name: str, workdir: Path, token: str, with open(file_path, 'wb') as f: f.write(response.content) - logger.info(f'Successfully downloaded image: {file_name}') + logger.debug(f'Successfully downloaded image: {file_name}') return file_path except requests.exceptions.RequestException as e: