diff --git a/.dockerignore b/.dockerignore index 7ce61c4f..db02b76e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,203 +1,203 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Data folder -data/ -data.zip -static/ -enrichment_analysis/data/ - -# History files -.Rhistory -.Rapp.history - -# Session Data files -.RData -.RDataTmp - -# User-specific files -.Ruserdata - -# Example code in package build process -*-Ex.R - -# Output files from R CMD build -/*.tar.gz - -# Output files from R CMD check -/*.Rcheck/ - -# RStudio files -.Rproj.user/ - -# produced vignettes -vignettes/*.html -vignettes/*.pdf - -# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 -.httr-oauth - -# knitr and R markdown default cache directories -*_cache/ -/cache/ - -# Temporary files created by R markdown -*.utf8.md -*.knit.md - -# R Environment Variables -.Renviron - -# pkgdown site -docs/ - -# translation temp files -po/*~ - -# RStudio Connect folder -rsconnect/ - -# Scratch -scratch/ -scratch.ipynb -scratch.py -*dummy* - -# Scripts for bulk processing -*-bulk.* -*-bulk*.* - -# Binary files -prepare_data/**/LazyFox -*.jar - -.github/ -.vscode/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Data folder +data/ +data.zip +static/ +enrichment_analysis/data/ + +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData +.RDataTmp + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron + +# pkgdown site +docs/ + +# translation temp files +po/*~ + +# RStudio Connect folder +rsconnect/ + +# Scratch +scratch/ +scratch.ipynb +scratch.py +*dummy* + +# Scripts for bulk processing +*-bulk.* +*-bulk*.* + +# Binary files +prepare_data/**/LazyFox +*.jar + +.github/ +.vscode/ Dockerfile* \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 651c92c5..5c9795c0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,32 +1,32 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: 'bug' -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To reproduce** -Steps to reproduce the behavior: -1. Go to '...' -2. Click on '...' -3. Scroll down to '...' -4. See error - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Platform** - - OS (Please indicate the version as well): [e.g., Windows 10, Ubuntu Jammy Jellyfish] - - Browser: [e.g., Chrome, Safari] - - Browser version: [e.g., 114.0.5735.199. If unspecified, the latest browser version will be assumed] - -**Additional context** -Add any other context about the problem here. +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: 'bug' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '...' +3. Scroll down to '...' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Platform** + - OS (Please indicate the version as well): [e.g., Windows 10, Ubuntu Jammy Jellyfish] + - Browser: [e.g., Chrome, Safari] + - Browser version: [e.g., 114.0.5735.199. If unspecified, the latest browser version will be assumed] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 36014cde..a1e43a6c 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,20 +1,20 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: '' -labels: 'enhancement' -assignees: '' - ---- - -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: 'enhancement' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/check-for-syntax-errors.yml b/.github/workflows/check-for-syntax-errors.yml index 300b7b17..7e39332b 100644 --- a/.github/workflows/check-for-syntax-errors.yml +++ b/.github/workflows/check-for-syntax-errors.yml @@ -1,8 +1,8 @@ -on: [push, pull_request] -name: Check for syntax errors -jobs: - check-for-syntax-errors: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: cclauss/Find-Python-syntax-errors-action@master +on: [push, pull_request] +name: Check for syntax errors +jobs: + check-for-syntax-errors: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: cclauss/Find-Python-syntax-errors-action@master diff --git a/.github/workflows/delete-package-versions.yml b/.github/workflows/delete-package-versions.yml index 9762a1a3..280c72d5 100644 --- a/.github/workflows/delete-package-versions.yml +++ b/.github/workflows/delete-package-versions.yml @@ -1,30 +1,30 @@ -name: Delete package versions - -on: - workflow_run: - workflows: [Create and publish a Docker image] - types: [completed] - branches: [main] - -jobs: - delete-versions: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} - - steps: - - uses: actions/delete-package-versions@v4 - with: - package-name: 'rice-pilaf/app' - package-type: 'container' - min-versions-to-keep: 1 - - uses: actions/delete-package-versions@v4 - with: - package-name: 'rice-pilaf/workflow' - package-type: 'container' - min-versions-to-keep: 1 - - on-failure: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'failure' }} - steps: - - run: echo 'The triggering workflow failed' +name: Delete package versions + +on: + workflow_run: + workflows: [Create and publish a Docker image] + types: [completed] + branches: [main] + +jobs: + delete-versions: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} + + steps: + - uses: actions/delete-package-versions@v4 + with: + package-name: 'rice-pilaf/app' + package-type: 'container' + min-versions-to-keep: 1 + - uses: actions/delete-package-versions@v4 + with: + package-name: 'rice-pilaf/workflow' + package-type: 'container' + min-versions-to-keep: 1 + + on-failure: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'failure' }} + steps: + - run: echo 'The triggering workflow failed' diff --git a/.github/workflows/dockerize-and-publish.yml b/.github/workflows/dockerize-and-publish.yml index b8d2cba8..24b356f1 100644 --- a/.github/workflows/dockerize-and-publish.yml +++ b/.github/workflows/dockerize-and-publish.yml @@ -1,67 +1,67 @@ -name: Create and publish a Docker image - -on: - workflow_run: - workflows: [Check for syntax errors] - types: [completed] - branches: [main] - -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -jobs: - build-and-push-image: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} - strategy: - fail-fast: false - matrix: # Cannot use env variables inside matrix - include: - - dockerfile: Dockerfile-app # Change to Dockerfile of app - image: ghcr.io/bioinfodlsu/rice-pilaf/app - - dockerfile: Dockerfile-workflow # Change to Dockerfile of workflow - image: ghcr.io/bioinfodlsu/rice-pilaf/workflow - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4 - with: - images: ${{ matrix.image }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log in to the Container registry - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push Docker image - uses: docker/build-push-action@v3 - with: - context: . - cache-from: type=gha - cache-to: type=gha, mode=max - file: ${{ matrix.dockerfile }} - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - on-failure: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'failure' }} - steps: - - run: echo 'The triggering workflow failed' +name: Create and publish a Docker image + +on: + workflow_run: + workflows: [Check for syntax errors] + types: [completed] + branches: [main] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} + strategy: + fail-fast: false + matrix: # Cannot use env variables inside matrix + include: + - dockerfile: Dockerfile-app # Change to Dockerfile of app + image: ghcr.io/bioinfodlsu/rice-pilaf/app + - dockerfile: Dockerfile-workflow # Change to Dockerfile of workflow + image: ghcr.io/bioinfodlsu/rice-pilaf/workflow + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ matrix.image }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + cache-from: type=gha + cache-to: type=gha, mode=max + file: ${{ matrix.dockerfile }} + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + on-failure: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'failure' }} + steps: + - run: echo 'The triggering workflow failed' diff --git a/.github/workflows/mirror-main-to-demo.yml b/.github/workflows/mirror-main-to-demo.yml index d9613f9c..ec0d0586 100644 --- a/.github/workflows/mirror-main-to-demo.yml +++ b/.github/workflows/mirror-main-to-demo.yml @@ -1,20 +1,20 @@ -name: Mirror main to demo branch - -on: - workflow_run: - workflows: [Check for syntax errors] - types: [completed] - branches: [main] - -jobs: - mirror_job: - runs-on: ubuntu-latest - name: Mirror main branch to demo branch - steps: - - name: Mirror action step - id: mirror - uses: google/mirror-branch-action@v2.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - source: 'main' - dest: 'demo' +name: Mirror main to demo branch + +on: + workflow_run: + workflows: [Check for syntax errors] + types: [completed] + branches: [main] + +jobs: + mirror_job: + runs-on: ubuntu-latest + name: Mirror main branch to demo branch + steps: + - name: Mirror action step + id: mirror + uses: google/mirror-branch-action@v2.0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + source: 'main' + dest: 'demo' diff --git a/.gitignore b/.gitignore index 33546701..810f6ec2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,201 +1,201 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Data folder -data/ -data.zip -static/ -enrichment_analysis/data/ - -# History files -.Rhistory -.Rapp.history - -# Session Data files -.RData -.RDataTmp - -# User-specific files -.Ruserdata - -# Example code in package build process -*-Ex.R - -# Output files from R CMD build -/*.tar.gz - -# Output files from R CMD check -/*.Rcheck/ - -# RStudio files -.Rproj.user/ - -# produced vignettes -vignettes/*.html -vignettes/*.pdf - -# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 -.httr-oauth - -# knitr and R markdown default cache directories -*_cache/ -/cache/ - -# Temporary files created by R markdown -*.utf8.md -*.knit.md - -# R Environment Variables -.Renviron - -# pkgdown site -docs/ - -# translation temp files -po/*~ - -# RStudio Connect folder -rsconnect/ - -# Scratch -scratch/ -scratch.ipynb -scratch.py -*dummy* -generic-enrichment.r -app-1.py - -# Scripts for bulk processing -*-bulk.* -*-bulk*.* - -# Binary files -prepare_data/**/LazyFox +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Data folder +data/ +data.zip +static/ +enrichment_analysis/data/ + +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData +.RDataTmp + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron + +# pkgdown site +docs/ + +# translation temp files +po/*~ + +# RStudio Connect folder +rsconnect/ + +# Scratch +scratch/ +scratch.ipynb +scratch.py +*dummy* +generic-enrichment.r +app-1.py + +# Scripts for bulk processing +*-bulk.* +*-bulk*.* + +# Binary files +prepare_data/**/LazyFox *.jar \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index cfaa0770..a8054ea2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,23 +1,23 @@ -{ - "[python]": { - "editor.defaultFormatter": "ms-python.autopep8", - "editor.formatOnSave": true - }, - "[r]": { - "editor.defaultFormatter": "REditorSupport.r", - "editor.formatOnSave": true - }, - "python.formatting.provider": "none", - "editor.codeActionsOnSave": { - "source.organizeImports": true - }, - "saveAndRunExt": { - "commands": [ - { - "match": ".*\\.py", - "isShellCommand": true, - "cmd": "autoflake -i --remove-all-unused-imports --remove-unused-variables ${file}" - } - ] - } -} +{ + "[python]": { + "editor.defaultFormatter": "ms-python.autopep8", + "editor.formatOnSave": true + }, + "[r]": { + "editor.defaultFormatter": "REditorSupport.r", + "editor.formatOnSave": true + }, + "python.formatting.provider": "none", + "editor.codeActionsOnSave": { + "source.organizeImports": true + }, + "saveAndRunExt": { + "commands": [ + { + "match": ".*\\.py", + "isShellCommand": true, + "cmd": "autoflake -i --remove-all-unused-imports --remove-unused-variables ${file}" + } + ] + } +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 27b57816..3e39ebc0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,25 +1,25 @@ -Welcome! -## Table of Contents -## Reporting Bugs -## Requesting Features -## Contributing Workflow -1. Read wiki -2. Fork repository and clone -3. Set up the environment - - OS - - Manual Setup - - Docker: https://learn.microsoft.com/en-us/training/modules/use-docker-container-dev-env-vs-code/ -5. Implement the feature - - Refer to the wiki - - Note on code formatting and linting -6. Dockerize - - Refer to the wiki -7. Submit pull request - - Pull requests with merge conflicts - - Have to pass the check for syntax errors -9. Update documentation - - Wiki documentation - -## License -## Code of Conduct -## Contact +Welcome! +## Table of Contents +## Reporting Bugs +## Requesting Features +## Contributing Workflow +1. Read wiki +2. Fork repository and clone +3. Set up the environment + - OS + - Manual Setup + - Docker: https://learn.microsoft.com/en-us/training/modules/use-docker-container-dev-env-vs-code/ +5. Implement the feature + - Refer to the wiki + - Note on code formatting and linting +6. Dockerize + - Refer to the wiki +7. Submit pull request + - Pull requests with merge conflicts + - Have to pass the check for syntax errors +9. Update documentation + - Wiki documentation + +## License +## Code of Conduct +## Contact diff --git a/Dockerfile-app b/Dockerfile-app index d079ab8d..b26e43a3 100644 --- a/Dockerfile-app +++ b/Dockerfile-app @@ -1,25 +1,25 @@ -FROM tiangolo/uwsgi-nginx-flask:python3.10 - -COPY . /app -WORKDIR /app - -RUN set -ex - -RUN echo 'deb [trusted=yes] http://cloud.r-project.org/bin/linux/debian bullseye-cran40/' >> /etc/apt/sources.list - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3-dev \ - python3-pip \ - && apt-get clean - -RUN pip3 install --no-cache-dir -r dependencies/requirements-app.txt - -# Install mcdp2 -RUN cd ../ \ - && git clone https://github.com/fmfi-compbio/mcdp2 \ - && cd mcdp2 \ - && git reset --hard fd7c69f5e97db8c1052df859cb02d86533287e64 \ - && pip3 install . \ - && cd ../app +FROM tiangolo/uwsgi-nginx-flask:python3.10 + +COPY . /app +WORKDIR /app + +RUN set -ex + +RUN echo 'deb [trusted=yes] http://cloud.r-project.org/bin/linux/debian bullseye-cran40/' >> /etc/apt/sources.list + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3-dev \ + python3-pip \ + && apt-get clean + +RUN pip3 install --no-cache-dir -r dependencies/requirements-app.txt + +# Install mcdp2 +RUN cd ../ \ + && git clone https://github.com/fmfi-compbio/mcdp2 \ + && cd mcdp2 \ + && git reset --hard fd7c69f5e97db8c1052df859cb02d86533287e64 \ + && pip3 install . \ + && cd ../app diff --git a/Dockerfile-workflow b/Dockerfile-workflow index ef6e21b2..88f3ed99 100644 --- a/Dockerfile-workflow +++ b/Dockerfile-workflow @@ -1,34 +1,34 @@ -FROM tiangolo/uwsgi-nginx-flask:python3.10 - -COPY . /app -WORKDIR /app - -RUN set -ex - -RUN echo 'deb [trusted=yes] http://cloud.r-project.org/bin/linux/debian bullseye-cran40/' >> /etc/apt/sources.list - -RUN apt-get clean \ - && apt-get update \ - && apt-get install -y \ - build-essential \ - git \ - libcurl4-openssl-dev \ - libffi-dev \ - libfontconfig1-dev \ - libssl-dev \ - libxml2-dev \ - python3-dev \ - python3-pip \ - r-base - -RUN pip3 install --no-cache-dir -r dependencies/requirements-workflow.txt - -# Install mcdp2 -RUN cd ../ \ - && git clone https://github.com/fmfi-compbio/mcdp2 \ - && cd mcdp2 \ - && git reset --hard fd7c69f5e97db8c1052df859cb02d86533287e64 \ - && pip3 install . \ - && cd ../app - -RUN Rscript --vanilla dependencies/install-libraries-workflow.r +FROM tiangolo/uwsgi-nginx-flask:python3.10 + +COPY . /app +WORKDIR /app + +RUN set -ex + +RUN echo 'deb [trusted=yes] http://cloud.r-project.org/bin/linux/debian bullseye-cran40/' >> /etc/apt/sources.list + +RUN apt-get clean \ + && apt-get update \ + && apt-get install -y \ + build-essential \ + git \ + libcurl4-openssl-dev \ + libffi-dev \ + libfontconfig1-dev \ + libssl-dev \ + libxml2-dev \ + python3-dev \ + python3-pip \ + r-base + +RUN pip3 install --no-cache-dir -r dependencies/requirements-workflow.txt + +# Install mcdp2 +RUN cd ../ \ + && git clone https://github.com/fmfi-compbio/mcdp2 \ + && cd mcdp2 \ + && git reset --hard fd7c69f5e97db8c1052df859cb02d86533287e64 \ + && pip3 install . \ + && cd ../app + +RUN Rscript --vanilla dependencies/install-libraries-workflow.r diff --git a/LICENSE b/LICENSE index 0afae2e4..dd58ad3c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2023 Bioinformatics Lab, De La Salle University Manila - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2023 Bioinformatics Lab, De La Salle University Manila + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index ac652051..a76b7575 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# RicePilaf -Welcome to RicePilaf, a post-GWAS/QTL analysis dashboard for rice genomes. - -## Installation and usage -Please visit the RicePilaf [wiki](https://github.com/bioinfodlsu/rice-pilaf/wiki). - -## Demo -A demo version can be seen [here](http://ricepilaf.bioinfodlsu.com/). - -## Contact -If you have issues, concerns, questions, please contact: Anish Shrestha (anish.shrestha --atmark-- dlsu.edu.ph) +# RicePilaf +Welcome to RicePilaf, a post-GWAS/QTL analysis dashboard for rice genomes. + +## Installation and usage +Please visit the RicePilaf [wiki](https://github.com/bioinfodlsu/rice-pilaf/wiki). + +## Demo +A demo version can be seen [here](http://ricepilaf.bioinfodlsu.com/). + +## Contact +If you have issues, concerns, questions, please contact: Anish Shrestha (anish.shrestha --atmark-- dlsu.edu.ph) diff --git a/app.py b/app.py index 93564ec3..c1842bc4 100644 --- a/app.py +++ b/app.py @@ -1,282 +1,282 @@ -import dash -import dash_bootstrap_components as dbc -from dash import dcc, html -import sqlite3 - -import pages.navigation.main_nav as main_nav - -import callbacks.homepage.callbacks -import callbacks.homepage.util -import callbacks.lift_over.callbacks -import callbacks.browse_loci.callbacks -import callbacks.coexpression.callbacks -import callbacks.tf_enrich.callbacks -import callbacks.text_mining.callbacks - -from callbacks.branch import * -from callbacks.constants import * -from callbacks.file_util import * - -from flask import Flask - -server = Flask(__name__, static_folder='static') -app = dash.Dash(__name__, use_pages=True, - external_stylesheets=[dbc.themes.BOOTSTRAP, - dbc.icons.BOOTSTRAP, dbc.icons.FONT_AWESOME], - server=server, - title='RicePilaf', - update_title='Loading...') - -welcome = dcc.Markdown( - ''' - Welcome ! Rice Pilaf is short for Rice Post-GWAS/QTL Dashboard. - Ok, we are not good at abbreviations, but like a good pilaf, this dashboard combines many ingredients. - With this tool, you can do amazing things like ... (write me) - ''' -) - - -# ============ -# Main Layout -# ============ - - -app.layout = lambda: dbc.Container([ - dbc.Row( - html.Div( - children=[ - html.P([ - 'This is a demo version. Click ', - dcc.Link( - ['here ', html.I( - id='demo-link', - className='fa-solid fa-up-right-from-square fa-2xs' - )], - href='https://github.com/bioinfodlsu/rice-pilaf/wiki/1.-Installation', - target='_blank', - className='top-navbar-item' - ), - ' to install.'], className='my-auto' - ) - ], - className='banner d-flex justify-content-center py-1 text-white', - id='demo-banner' - ), - style=show_if_in_demo_branch() - ), - - dbc.Row(main_nav.navbar()), - - dash.page_container, - - # Session storage - html.Div( - id='session-container', - children=[ - # ========= - # Homepage - # ========= - dcc.Store( - id='homepage-is-submitted', - storage_type='session' - ), - - dcc.Store( - id='homepage-genomic-intervals-saved-input', - storage_type='session' - ), - - dcc.Store( - id='homepage-genomic-intervals-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='current-analysis-page-nav', - storage_type='session' - ), - - - - # ========== - # Lift-over - # ========== - dcc.Store( - id='lift-over-is-submitted', - storage_type='session' - ), - - dcc.Store( - id='lift-over-active-tab', - storage_type='session' - ), - - dcc.Store( - id='lift-over-other-refs-saved-input', - storage_type='session' - ), - - dcc.Store( - id='lift-over-other-refs-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='lift-over-active-filter', - storage_type='session' - ), - - dcc.Store( - id='lift-over-nb-table', - storage_type='session' - ), - - dcc.Store( - id='lift-over-nb-entire-table', - storage_type='session' - ), - - # ============ - # IGV Browser - # ============ - dcc.Store( - id='igv-selected-genomic-intervals-saved-input', - storage_type='session' - ), - - dcc.Store( - id='igv-selected-genomic-intervals-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='igv-selected-tracks-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='igv-is-submitted', - storage_type='session' - ), - - # ============== - # Co-expression - # ============== - dcc.Store( - id='coexpression-addl-genes-saved-input', - storage_type='session' - ), - - dcc.Store( - id='coexpression-submitted-addl-genes', - storage_type='session' - ), - - dcc.Store( - id='coexpression-combined-genes', - storage_type='session' - ), - - dcc.Store( - id='coexpression-network-saved-input', - storage_type='session' - ), - - dcc.Store( - id='coexpression-submitted-network', - storage_type='session' - ), - - dcc.Store( - id='coexpression-clustering-algo-saved-input', - storage_type='session' - ), - - dcc.Store( - id='coexpression-submitted-clustering-algo', - storage_type='session' - ), - - dcc.Store( - id='coexpression-submitted-parameter-module', - storage_type='session' - ), - - dcc.Store( - id='coexpression-parameter-module-saved-input', - storage_type='session' - ), - - dcc.Store( - id='coexpression-is-submitted', - storage_type='session' - ), - - # ============================== - # Regulatory Feature Enrichment - # ============================== - - dcc.Store( - id='tfbs-saved-input', - storage_type='session' - ), - - dcc.Store( - id='tfbs-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='tfbs-is-submitted', - storage_type='session' - ), - - # ============ - # Text Mining - # ============ - - dcc.Store( - id='text-mining-query-saved-input', - storage_type='session' - ), - - dcc.Store( - id='text-mining-query-submitted-input', - storage_type='session' - ), - - dcc.Store( - id='text-mining-is-submitted', - storage_type='session' - ), - ]) -], fluid=True, className='pb-4') - -callbacks.homepage.callbacks.init_callback(app) - -callbacks.lift_over.callbacks.init_callback(app) -callbacks.browse_loci.callbacks.init_callback(app) -callbacks.coexpression.callbacks.init_callback(app) -callbacks.tf_enrich.callbacks.init_callback(app) -callbacks.text_mining.callbacks.init_callback(app) - -# Create database table -const = Constants() -make_dir(const.TEMP) - -try: - connection = sqlite3.connect(const.FILE_STATUS_DB) - cursor = connection.cursor() - - query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' - - cursor.execute(query) - connection.commit() - - cursor.close() - connection.close() -except sqlite3.Error as error: - pass - -if __name__ == '__main__': - app.run_server(port='8050', debug=True) +import dash +import dash_bootstrap_components as dbc +from dash import dcc, html +import sqlite3 + +import pages.navigation.main_nav as main_nav + +import callbacks.homepage.callbacks +import callbacks.homepage.util +import callbacks.lift_over.callbacks +import callbacks.browse_loci.callbacks +import callbacks.coexpression.callbacks +import callbacks.tf_enrich.callbacks +import callbacks.text_mining.callbacks + +from callbacks.branch import * +from callbacks.constants import * +from callbacks.file_util import * + +from flask import Flask + +server = Flask(__name__, static_folder='static') +app = dash.Dash(__name__, use_pages=True, + external_stylesheets=[dbc.themes.BOOTSTRAP, + dbc.icons.BOOTSTRAP, dbc.icons.FONT_AWESOME], + server=server, + title='RicePilaf', + update_title='Loading...') + +welcome = dcc.Markdown( + ''' + Welcome ! Rice Pilaf is short for Rice Post-GWAS/QTL Dashboard. + Ok, we are not good at abbreviations, but like a good pilaf, this dashboard combines many ingredients. + With this tool, you can do amazing things like ... (write me) + ''' +) + + +# ============ +# Main Layout +# ============ + + +app.layout = lambda: dbc.Container([ + dbc.Row( + html.Div( + children=[ + html.P([ + 'This is a demo version. Click ', + dcc.Link( + ['here ', html.I( + id='demo-link', + className='fa-solid fa-up-right-from-square fa-2xs' + )], + href='https://github.com/bioinfodlsu/rice-pilaf/wiki/1.-Installation', + target='_blank', + className='top-navbar-item' + ), + ' to install.'], className='my-auto' + ) + ], + className='banner d-flex justify-content-center py-1 text-white', + id='demo-banner' + ), + style=show_if_in_demo_branch() + ), + + dbc.Row(main_nav.navbar()), + + dash.page_container, + + # Session storage + html.Div( + id='session-container', + children=[ + # ========= + # Homepage + # ========= + dcc.Store( + id='homepage-is-submitted', + storage_type='session' + ), + + dcc.Store( + id='homepage-genomic-intervals-saved-input', + storage_type='session' + ), + + dcc.Store( + id='homepage-genomic-intervals-submitted-input', + storage_type='session' + ), + + dcc.Store( + id='current-analysis-page-nav', + storage_type='session' + ), + + + + # ========== + # Lift-over + # ========== + dcc.Store( + id='lift-over-is-submitted', + storage_type='session' + ), + + dcc.Store( + id='lift-over-active-tab', + storage_type='session' + ), + + dcc.Store( + id='lift-over-other-refs-saved-input', + storage_type='session' + ), + + dcc.Store( + id='lift-over-other-refs-submitted-input', + storage_type='session' + ), + + dcc.Store( + id='lift-over-active-filter', + storage_type='session' + ), + + dcc.Store( + id='lift-over-nb-table', + storage_type='session' + ), + + dcc.Store( + id='lift-over-nb-entire-table', + storage_type='session' + ), + + # ============ + # IGV Browser + # ============ + dcc.Store( + id='igv-selected-genomic-intervals-saved-input', + storage_type='session' + ), + + dcc.Store( + id='igv-selected-genomic-intervals-submitted-input', + storage_type='session' + ), + + dcc.Store( + id='igv-selected-tracks-submitted-input', + storage_type='session' + ), + + dcc.Store( + id='igv-is-submitted', + storage_type='session' + ), + + # ============== + # Co-expression + # ============== + dcc.Store( + id='coexpression-addl-genes-saved-input', + storage_type='session' + ), + + dcc.Store( + id='coexpression-submitted-addl-genes', + storage_type='session' + ), + + dcc.Store( + id='coexpression-combined-genes', + storage_type='session' + ), + + dcc.Store( + id='coexpression-network-saved-input', + storage_type='session' + ), + + dcc.Store( + id='coexpression-submitted-network', + storage_type='session' + ), + + dcc.Store( + id='coexpression-clustering-algo-saved-input', + storage_type='session' + ), + + dcc.Store( + id='coexpression-submitted-clustering-algo', + storage_type='session' + ), + + dcc.Store( + id='coexpression-submitted-parameter-module', + storage_type='session' + ), + + dcc.Store( + id='coexpression-parameter-module-saved-input', + storage_type='session' + ), + + dcc.Store( + id='coexpression-is-submitted', + storage_type='session' + ), + + # ============================== + # Regulatory Feature Enrichment + # ============================== + + dcc.Store( + id='tfbs-saved-input', + storage_type='session' + ), + + dcc.Store( + id='tfbs-submitted-input', + storage_type='session' + ), + + dcc.Store( + id='tfbs-is-submitted', + storage_type='session' + ), + + # ============ + # Text Mining + # ============ + + dcc.Store( + id='text-mining-query-saved-input', + storage_type='session' + ), + + dcc.Store( + id='text-mining-query-submitted-input', + storage_type='session', + ), + + dcc.Store( + id='text-mining-is-submitted', + storage_type='session' + ), + ]) +], fluid=True, className='pb-4') + +callbacks.homepage.callbacks.init_callback(app) + +callbacks.lift_over.callbacks.init_callback(app) +callbacks.browse_loci.callbacks.init_callback(app) +callbacks.coexpression.callbacks.init_callback(app) +callbacks.tf_enrich.callbacks.init_callback(app) +callbacks.text_mining.callbacks.init_callback(app) + +# Create database table +const = Constants() +make_dir(const.TEMP) + +try: + connection = sqlite3.connect(const.FILE_STATUS_DB) + cursor = connection.cursor() + + query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' + + cursor.execute(query) + connection.commit() + + cursor.close() + connection.close() +except sqlite3.Error as error: + pass + +if __name__ == '__main__': + app.run_server(port='8050', debug=True) diff --git a/assets/1-global.css b/assets/1-global.css index 440aa34c..6ef584a9 100644 --- a/assets/1-global.css +++ b/assets/1-global.css @@ -1,67 +1,75 @@ -:root { - --green-top: #254b5d; - --green-top-hover: #2d6078; - --green-lighter: #254b5d; - --green-lightest: #4aa59f; - - --light-gray: #cfddd7; - --bg-gray: #fdfdfd; - --bg-gray-darker: #f2f7f7; - --bg-gray-darkest: #e8f0f0; - - --th-gray: #e1eeed; - --td-gray: #eff2f2; - --table-button-gray: #d3d3d3; - - --link-blue: #6b6bb9; - --button-red: #dc3545; -} - -html, body { - background-color: var(--bg-gray); -} - -.page-button { - width: 11em; - background-color: var(--green-lighter); - border-color: var(--green-lighter); -} - -.page-button:hover { - width: 11em; - background-color: var(--green-top-hover); - border-color: var(--green-top-hover); -} - -i { - cursor: pointer; -} - -a { - color: var(--link-blue); - text-decoration: none; -} - -a:hover { - color: var(--bs-body-color); -} - -.non-clickable { - cursor: default; -} - -.bi-chevron-bar-right { - -webkit-text-stroke: 1px; -} - -hr { - margin: 0; -} - -.left-align { - text-align: left !important; -} - -.link-muted:hover { - color: #6c757d !important; +:root { + --green-top: #254b5d; + --green-top-hover: #2d6078; + --green-lighter: #254b5d; + --green-lightest: #4aa59f; + + --light-gray: #cfddd7; + --bg-gray: #fdfdfd; + --bg-gray-darker: #f2f7f7; + --bg-gray-darkest: #e8f0f0; + + --th-gray: #e1eeed; + --td-gray: #eff2f2; + --table-button-gray: #d3d3d3; + + --link-blue: #6b6bb9; + --button-red: #dc3545; + + --button-gray-disabled: #cccccc +} + +html, body { + background-color: var(--bg-gray); +} + +.page-button { + width: 11em; + background-color: var(--green-lighter); + border-color: var(--green-lighter); +} + +.page-button:disabled { + width: 11em; + background-color: var(--button-gray-disabled); + border-color: var(--button-gray-disabled); +} + +.page-button:hover { + width: 11em; + background-color: var(--green-top-hover); + border-color: var(--green-top-hover); +} + +i { + cursor: pointer; +} + +a { + color: var(--link-blue); + text-decoration: none; +} + +a:hover { + color: var(--bs-body-color); +} + +.non-clickable { + cursor: default; +} + +.bi-chevron-bar-right { + -webkit-text-stroke: 1px; +} + +hr { + margin: 0; +} + +.left-align { + text-align: left !important; +} + +.link-muted:hover { + color: #6c757d !important; } \ No newline at end of file diff --git a/assets/2-homepage.css b/assets/2-homepage.css index bc4e02d5..4d8d88bb 100644 --- a/assets/2-homepage.css +++ b/assets/2-homepage.css @@ -1,106 +1,106 @@ -#logo { - height: 30px; -} - -#genome-ref-input-container { - background-color: var(--bg-gray-darker); -} - -#homepage-reset, #homepage-clear-cache { - background-color: white; -} - -#homepage-reset:hover, #homepage-clear-cache:hover { - background-color: var(--button-red); -} - -#homepage-submit { - background-color: var(--green-top); - border-color: var(--green-top); -} - -#homepage-submit:hover { - background-color: var(--green-top-hover); - border-color: var(--green-top-hover); -} - -#top-navbar { - background-color: var(--green-top) !important; -} - -.home-button { - width: 100%; -} - -#homepage-dash-nav { - border-radius: 10px; -} - -#homepage-dash-nav .nav-item:hover { - background-color: var(--bg-gray-darkest); - border-radius: 10px; -} - -.nav-link { - color: var(--bs-body-color) !important; - cursor: pointer; -} - -.nav-pills .nav-link.active { - color: white !important; - background-color: var(--green-lightest); -} - -.top-navbar-item { - color: var(--bs-nav-link-color) !important; -} - -.top-navbar-item.active { - color: white !important; -} - -#post-gwas-hdr { - margin-bottom: 1em; -} - -#genomic-interval-container { - margin-bottom: 0.6em; -} - -#genomic-interval-hdr { - display: inline; -} - -.sample-genomic-interval { - color: var(--link-blue); -} - -.sample-genomic-interval:hover { - text-decoration: underline; - cursor: pointer; -} - -#reset-analyses-container { - margin-left: 3.5em; - margin-right: 3.5em; -} - -.banner { - background-color: #214151; -} - -#demo-banner { - border-bottom: 1px solid white; -} - -#demo-banner i { - color: white !important; -} - -#demo-banner i.active { - color: white !important; -} - -#page { - margin-left: 3em; +#logo { + height: 30px; +} + +#genome-ref-input-container { + background-color: var(--bg-gray-darker); +} + +#homepage-reset, #homepage-clear-cache { + background-color: white; +} + +#homepage-reset:hover, #homepage-clear-cache:hover { + background-color: var(--button-red); +} + +#homepage-submit { + background-color: var(--green-top); + border-color: var(--green-top); +} + +#homepage-submit:hover { + background-color: var(--green-top-hover); + border-color: var(--green-top-hover); +} + +#top-navbar { + background-color: var(--green-top) !important; +} + +.home-button { + width: 100%; +} + +#homepage-dash-nav { + border-radius: 10px; +} + +#homepage-dash-nav .nav-item:hover { + background-color: var(--bg-gray-darkest); + border-radius: 10px; +} + +.nav-link { + color: var(--bs-body-color) !important; + cursor: pointer; +} + +.nav-pills .nav-link.active { + color: white !important; + background-color: var(--green-lightest); +} + +.top-navbar-item { + color: var(--bs-nav-link-color) !important; +} + +.top-navbar-item.active { + color: white !important; +} + +#post-gwas-hdr { + margin-bottom: 1em; +} + +#genomic-interval-container { + margin-bottom: 0.6em; +} + +#genomic-interval-hdr { + display: inline; +} + +.sample-genomic-interval { + color: var(--link-blue); +} + +.sample-genomic-interval:hover { + text-decoration: underline; + cursor: pointer; +} + +#reset-analyses-container { + margin-left: 3.5em; + margin-right: 3.5em; +} + +.banner { + background-color: #214151; +} + +#demo-banner { + border-bottom: 1px solid white; +} + +#demo-banner i { + color: white !important; +} + +#demo-banner i.active { + color: white !important; +} + +#page { + margin-left: 3em; } \ No newline at end of file diff --git a/assets/3-table.css b/assets/3-table.css index 2736dd36..1109eaf5 100644 --- a/assets/3-table.css +++ b/assets/3-table.css @@ -1,58 +1,58 @@ -table tr td { - vertical-align: top; -} - -table tr td p, table tr td div p { - margin-bottom: 0; - padding-bottom: 0; - display: inline; -} - -table tr td div { - text-align: right !important; -} - -th:not(.dash-filter) { - font-weight: bold !important; - background-color: var(--th-gray) !important; -} - -tr:nth-of-type(odd) td { - background-color: var(--td-gray) !important; -} - -th, td { - padding: 0.5em !important; - font-family: sans-serif; -} - -.page-number, .current-page { - font-family: var(--bs-body-font-family) !important; - font-size: var(--font-size-base) !important; -} - -.table-button { - border: 1px solid var(--table-button-gray); - background-color: transparent; -} - -.table-button:hover { - border: 1px solid var(--table-button-gray); - background-color: var(--td-gray); -} - -#text-mining-result-table th { - text-align: center; -} - -#text-mining-result-table table tr td div { - text-align: left !important; -} - -.dash-cell-value { - overflow-y: hidden !important; -} - -table tr td ul { - text-align: left !important; +table tr td { + vertical-align: top; +} + +table tr td p, table tr td div p { + margin-bottom: 0; + padding-bottom: 0; + display: inline; +} + +table tr td div { + text-align: right !important; +} + +th:not(.dash-filter) { + font-weight: bold !important; + background-color: var(--th-gray) !important; +} + +tr:nth-of-type(odd) td { + background-color: var(--td-gray) !important; +} + +th, td { + padding: 0.5em !important; + font-family: sans-serif; +} + +.page-number, .current-page { + font-family: var(--bs-body-font-family) !important; + font-size: var(--font-size-base) !important; +} + +.table-button { + border: 1px solid var(--table-button-gray); + background-color: transparent; +} + +.table-button:hover { + border: 1px solid var(--table-button-gray); + background-color: var(--td-gray); +} + +#text-mining-result-table th { + text-align: center; +} + +#text-mining-result-table table tr td div { + text-align: left !important; +} + +.dash-cell-value { + overflow-y: hidden !important; +} + +table tr td ul { + text-align: left !important; } \ No newline at end of file diff --git a/assets/4-input-elem.css b/assets/4-input-elem.css index 3cb90557..db7033b5 100644 --- a/assets/4-input-elem.css +++ b/assets/4-input-elem.css @@ -1,30 +1,30 @@ -.form-check-input:checked { - background-color: var(--green-lighter); - border-color: var(--green-lighter); -} - -.form-check-input:focus { - border-color: var(--light-gray); - box-shadow: 0 0 0 0.25rem var(--light-gray); -} - -.rc-slider-dot-active, .rc-slider-handle { - border-color: var(--green-lighter) !important; -} - -.rc-slider-track { - background-color: var(--green-lighter) !important; -} - -#coexpression-parameter-slider-container { - display: flex; - justify-content: center; -} - -#coexpression-parameter-slider { - width: 95%; -} - -#lift-over-overlap-table-filter { - margin-bottom: -0.5em; +.form-check-input:checked { + background-color: var(--green-lighter); + border-color: var(--green-lighter); +} + +.form-check-input:focus { + border-color: var(--light-gray); + box-shadow: 0 0 0 0.25rem var(--light-gray); +} + +.rc-slider-dot-active, .rc-slider-handle { + border-color: var(--green-lighter) !important; +} + +.rc-slider-track { + background-color: var(--green-lighter) !important; +} + +#coexpression-parameter-slider-container { + display: flex; + justify-content: center; +} + +#coexpression-parameter-slider { + width: 95%; +} + +#lift-over-overlap-table-filter { + margin-bottom: -0.5em; } \ No newline at end of file diff --git a/assets/5-graph.css b/assets/5-graph.css index 176b9da3..877a384c 100644 --- a/assets/5-graph.css +++ b/assets/5-graph.css @@ -1,6 +1,6 @@ -#coexpression-module-graph { - visibility: hidden; - width: 100%; - height: 100vh; - border: 1px solid var(--table-button-gray); +#coexpression-module-graph { + visibility: hidden; + width: 100%; + height: 100vh; + border: 1px solid var(--table-button-gray); } \ No newline at end of file diff --git a/assets/6-igv.css b/assets/6-igv.css index 3f2ac2dd..e5ef926d 100644 --- a/assets/6-igv.css +++ b/assets/6-igv.css @@ -1,3 +1,3 @@ -.igv-root-div, #igv-Nipponbare-local { - margin: 0 !important; +.igv-root-div, #igv-Nipponbare-local { + margin: 0 !important; } \ No newline at end of file diff --git a/assets/7-loading.css b/assets/7-loading.css index 2d897d1e..782bc77b 100644 --- a/assets/7-loading.css +++ b/assets/7-loading.css @@ -1,19 +1,19 @@ -/* This ensures that the loading affordance is displayed at the top of the container div. - Otherwise, it will be displayed at the center of the container div. */ - -#coexpression-results-module-tabs-container + div, -#coexpression-pathways + div, -#lift-over-results-table + div, -#text-mining-result-table + div, -#tf-enrichment-result-table + div, -#coexpression-module-graph + div, -#coexpression-graph-container { - display: block !important; - height: 0 !important; - margin-top: 0; - padding-top: 0; -} - -.dash-default-spinner > div { - background-color: var(--green-lighter) !important; +/* This ensures that the loading affordance is displayed at the top of the container div. + Otherwise, it will be displayed at the center of the container div. */ + +#coexpression-results-module-tabs-container + div, +#coexpression-pathways + div, +#lift-over-results-table + div, +#text-mining-result-table + div, +#tf-enrichment-result-table + div, +#coexpression-module-graph + div, +#coexpression-graph-container { + display: block !important; + height: 0 !important; + margin-top: 0; + padding-top: 0; +} + +.dash-default-spinner > div { + background-color: var(--green-lighter) !important; } \ No newline at end of file diff --git a/assets/8-tooltips.css b/assets/8-tooltips.css index eee3d505..f6e365ab 100644 --- a/assets/8-tooltips.css +++ b/assets/8-tooltips.css @@ -1,12 +1,12 @@ -.algo-desc { - display: block; - margin-bottom: 1em; -} - -.reference { - font-size: small; -} - -i[id$="-tooltip"] { - margin-left: 1em; +.algo-desc { + display: block; + margin-bottom: 1em; +} + +.reference { + font-size: small; +} + +i[id$="-tooltip"] { + margin-left: 1em; } \ No newline at end of file diff --git a/assets/9-analysis.css b/assets/9-analysis.css index ba32f9b7..1cbad7df 100644 --- a/assets/9-analysis.css +++ b/assets/9-analysis.css @@ -1,28 +1,28 @@ -.analysis-intro { - background-color: var(--bg-gray-darker); - border-radius: 10px; -} - -.analysis-intro p:last-child { - margin-bottom: 0; - padding-bottom: 0; -} - -#lift-over-results-statistics { - margin-bottom: 0; - padding-bottom: 0; -} - -#coexpression-table-stats { - text-align: left; -} - -.stats-icon { - color: #254b5d; -} - -.stats { - margin-left: 0; - padding-left: 0; - text-align: left; +.analysis-intro { + background-color: var(--bg-gray-darker); + border-radius: 10px; +} + +.analysis-intro p:last-child { + margin-bottom: 0; + padding-bottom: 0; +} + +#lift-over-results-statistics { + margin-bottom: 0; + padding-bottom: 0; +} + +#coexpression-table-stats { + text-align: left; +} + +.stats-icon { + color: #254b5d; +} + +.stats { + margin-left: 0; + padding-left: 0; + text-align: left; } \ No newline at end of file diff --git a/callbacks/branch.py b/callbacks/branch.py index e3a414ab..26a9cb80 100644 --- a/callbacks/branch.py +++ b/callbacks/branch.py @@ -1,25 +1,25 @@ -from pathlib import Path - - -def get_active_branch_name(): - """ - Lifted from https://stackoverflow.com/questions/26134026/how-to-get-the-current-checked-out-git-branch-name-through-pygit2 - """ - head_dir = Path(".") / ".git" / "HEAD" - with head_dir.open("r") as f: - content = f.read().splitlines() - - for line in content: - if line[0:4] == "ref:": - return line.partition("refs/heads/")[2] - - -def is_in_demo_branch(): - return get_active_branch_name() == 'demo' - - -def show_if_in_demo_branch(): - if is_in_demo_branch(): - return {'display': 'block'} - - return {'display': 'none'} +from pathlib import Path + + +def get_active_branch_name(): + """ + Lifted from https://stackoverflow.com/questions/26134026/how-to-get-the-current-checked-out-git-branch-name-through-pygit2 + """ + head_dir = Path(".") / ".git" / "HEAD" + with head_dir.open("r") as f: + content = f.read().splitlines() + + for line in content: + if line[0:4] == "ref:": + return line.partition("refs/heads/")[2] + + +def is_in_demo_branch(): + return get_active_branch_name() == 'demo' + + +def show_if_in_demo_branch(): + if is_in_demo_branch(): + return {'display': 'block'} + + return {'display': 'none'} diff --git a/callbacks/browse_loci/callbacks.py b/callbacks/browse_loci/callbacks.py index b1190e0a..c72d3f64 100644 --- a/callbacks/browse_loci/callbacks.py +++ b/callbacks/browse_loci/callbacks.py @@ -1,218 +1,218 @@ -import json -import dash_bio as dashbio - -from dash import Input, Output, State, html -from dash.exceptions import PreventUpdate -from flask import json, send_from_directory, abort -from werkzeug.exceptions import HTTPException - -from .util import * -from ..lift_over import util as lift_over_util -from ..file_util import * - -from ..constants import Constants -const = Constants() - - -def init_callback(app): - @app.callback( - Output('igv-genomic-intervals-input', 'children'), - State('homepage-genomic-intervals-submitted-input', 'data'), - Input('homepage-is-submitted', 'data'), - Input('igv-submit', 'n_clicks') - ) - def display_input(nb_intervals_str, homepage_is_submitted, *_): - if homepage_is_submitted: - if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): - return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] - else: - return None - - raise PreventUpdate - - @app.callback( - Output('igv-is-submitted', 'data', allow_duplicate=True), - Output('igv-selected-genomic-intervals-submitted-input', 'data'), - Input('igv-submit', 'n_clicks'), - State('igv-genomic-intervals', 'value'), - State('igv-track-filter', 'value'), - State('homepage-is-submitted', 'data'), - prevent_initial_call=True - ) - def submit_igv_input(igv_submit_n_clicks, selected_nb_interval, selected_tracks, homepage_is_submitted): - if homepage_is_submitted and igv_submit_n_clicks >= 1: - return True, selected_nb_interval - - raise PreventUpdate - - @app.callback( - Output('igv-results-container', 'style'), - Input('igv-is-submitted', 'data') - ) - def display_igv_output(igv_is_submitted): - if igv_is_submitted: - return {'display': 'block'} - else: - return {'display': 'none'} - - # Lifted from https://flask.palletsprojects.com/en/2.2.x/errorhandling/#:~:text=When%20an%20error%20occurs%20in,user%20when%20an%20error%20occurs. - @app.server.errorhandler(HTTPException) - def handle_exception(e): - """Return JSON instead of HTML for HTTP errors.""" - # start with the correct headers and status code from the error - response = e.get_response() - # replace the body with JSON - response.data = json.dumps({ - "code": e.code, - "name": e.name, - "description": e.description, - }) - response.content_type = "application/json" - return response - - @app.server.route('/genomes_nipponbare/') - def send_genomes_nipponbare_url(filename): - try: - return send_from_directory(const.GENOMES_NIPPONBARE, filename) - except FileNotFoundError: - abort(404) - - @app.server.route('/annotations_nb////') - def send_annotations_nb_url(nb_intervals_str, foldername, selected_interval_str, file_format): - try: - temp_output_folder_dir = get_path_to_temp( - nb_intervals_str, const.TEMP_IGV, foldername) - - selected_interval_str_filename = convert_text_to_path( - selected_interval_str) - - selected_interval_str_file = f'{selected_interval_str_filename}.{file_format}' - - return send_from_directory(temp_output_folder_dir, selected_interval_str_file) - - except FileNotFoundError: - abort(404) - - @app.server.route('/open_chromatin_panicle/') - def send_open_chromatin_panicle_url(filename): - try: - return send_from_directory(const.OPEN_CHROMATIN_PANICLE, filename) - - except FileNotFoundError: - abort(404) - - @app.callback( - Output('igv-genomic-intervals', 'options'), - Output('igv-genomic-intervals', 'value'), - Input('homepage-genomic-intervals-submitted-input', 'data'), - - State('homepage-is-submitted', 'data'), - State('igv-selected-genomic-intervals-saved-input', 'data') - ) - def display_selected_genomic_intervals(nb_intervals_str, homepage_is_submitted, selected_nb_interval): - if homepage_is_submitted: - igv_options = nb_intervals_str.split(';') - - if not selected_nb_interval: - selected_nb_interval = igv_options[0] - - return igv_options, selected_nb_interval - - raise PreventUpdate - - @app.callback( - Output('igv-track-intro', 'children'), - Output('igv-track-filter', 'options'), - Output('igv-track-filter', 'value'), - Input('igv-selected-genomic-intervals-submitted-input', 'data'), - State('homepage-is-submitted', 'data'), - Input('igv-selected-tracks-submitted-input', 'data'), - State('igv-is-submitted', 'data') - ) - def display_igv_tracks_filter(nb_intervals_str, homepage_is_submitted, selected_tracks, igv_is_submitted): - if homepage_is_submitted and igv_is_submitted: - tracks = ['MSU V7 genes', 'chromatin open'] - - if not selected_tracks: - selected_tracks = [tracks[0]] - - return 'Select the tracks to be displayed', \ - tracks, selected_tracks - raise PreventUpdate - - @app.callback( - Output('igv-display', 'children'), - State('igv-selected-genomic-intervals-submitted-input', 'data'), - Input('igv-selected-tracks-submitted-input', 'data'), - State('homepage-is-submitted', 'data'), - State('igv-is-submitted', 'data'), - State('homepage-genomic-intervals-submitted-input', 'data') - ) - def display_igv(selected_nb_intervals_str, selected_tracks, homepage_is_submitted, igv_is_submitted, nb_intervals_str): - if homepage_is_submitted and igv_is_submitted: - track_info = [ - { - "name": "MSU V7 genes", - "format": "gff3", - "description": " Rice Genome Annotation Project", - "url": f"annotations_nb/{nb_intervals_str}/IRGSPMSU.gff.db/{selected_nb_intervals_str}/gff", - "displayMode": "EXPANDED", - "height": 200 - }, - { - "name": "chromatin open", - "format": "bed", - "description": " Rice Genome Annotation Project", - "url": f"open_chromatin_panicle/SRR7126116_ATAC-Seq_Panicles.bed", - "displayMode": "EXPANDED", - "height": 200 - } - ] - - display_tracks = [ - track for track in track_info if selected_tracks and track['name'] in selected_tracks] - - return html.Div([ - dashbio.Igv( - id='igv-Nipponbare-local', - reference={ - "id": "GCF_001433935.1", - "name": "O. sativa IRGSP-1.0 (GCF_001433935.1)", - "fastaURL": "genomes_nipponbare/Npb.fasta", - "indexURL": "genomes_nipponbare/Npb.fasta.fai", - "tracks": display_tracks - }, - locus=[selected_nb_intervals_str] - ) - ]) - - raise PreventUpdate - - @app.callback( - Output('igv-selected-genomic-intervals-saved-input', - 'data', allow_duplicate=True), - Input('igv-genomic-intervals', 'value'), - State('homepage-is-submitted', 'data'), - Input('igv-track-filter', 'value'), - - prevent_initial_call=True - ) - def set_input_igv_session_state(selected_nb_intervals_str, homepage_is_submitted, *_): - if homepage_is_submitted: - return selected_nb_intervals_str - - raise PreventUpdate - - @app.callback( - Output('igv-selected-tracks-submitted-input', - 'data', allow_duplicate=True), - Input('igv-track-filter', 'value'), - State('homepage-is-submitted', 'data'), - State('igv-is-submitted', 'data'), - prevent_initial_call=True - ) - def set_submitted_igv_session_state(selected_tracks, homepage_is_submitted, igv_is_submitted): - if homepage_is_submitted and igv_is_submitted: - return selected_tracks - - raise PreventUpdate +import json +import dash_bio as dashbio + +from dash import Input, Output, State, html +from dash.exceptions import PreventUpdate +from flask import json, send_from_directory, abort +from werkzeug.exceptions import HTTPException + +from .util import * +from ..lift_over import util as lift_over_util +from ..file_util import * + +from ..constants import Constants +const = Constants() + + +def init_callback(app): + @app.callback( + Output('igv-genomic-intervals-input', 'children'), + State('homepage-genomic-intervals-submitted-input', 'data'), + Input('homepage-is-submitted', 'data'), + Input('igv-submit', 'n_clicks') + ) + def display_input(nb_intervals_str, homepage_is_submitted, *_): + if homepage_is_submitted: + if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): + return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] + else: + return None + + raise PreventUpdate + + @app.callback( + Output('igv-is-submitted', 'data', allow_duplicate=True), + Output('igv-selected-genomic-intervals-submitted-input', 'data'), + Input('igv-submit', 'n_clicks'), + State('igv-genomic-intervals', 'value'), + State('igv-track-filter', 'value'), + State('homepage-is-submitted', 'data'), + prevent_initial_call=True + ) + def submit_igv_input(igv_submit_n_clicks, selected_nb_interval, selected_tracks, homepage_is_submitted): + if homepage_is_submitted and igv_submit_n_clicks >= 1: + return True, selected_nb_interval + + raise PreventUpdate + + @app.callback( + Output('igv-results-container', 'style'), + Input('igv-is-submitted', 'data') + ) + def display_igv_output(igv_is_submitted): + if igv_is_submitted: + return {'display': 'block'} + else: + return {'display': 'none'} + + # Lifted from https://flask.palletsprojects.com/en/2.2.x/errorhandling/#:~:text=When%20an%20error%20occurs%20in,user%20when%20an%20error%20occurs. + @app.server.errorhandler(HTTPException) + def handle_exception(e): + """Return JSON instead of HTML for HTTP errors.""" + # start with the correct headers and status code from the error + response = e.get_response() + # replace the body with JSON + response.data = json.dumps({ + "code": e.code, + "name": e.name, + "description": e.description, + }) + response.content_type = "application/json" + return response + + @app.server.route('/genomes_nipponbare/') + def send_genomes_nipponbare_url(filename): + try: + return send_from_directory(const.GENOMES_NIPPONBARE, filename) + except FileNotFoundError: + abort(404) + + @app.server.route('/annotations_nb////') + def send_annotations_nb_url(nb_intervals_str, foldername, selected_interval_str, file_format): + try: + temp_output_folder_dir = get_path_to_temp( + nb_intervals_str, const.TEMP_IGV, foldername) + + selected_interval_str_filename = convert_text_to_path( + selected_interval_str) + + selected_interval_str_file = f'{selected_interval_str_filename}.{file_format}' + + return send_from_directory(temp_output_folder_dir, selected_interval_str_file) + + except FileNotFoundError: + abort(404) + + @app.server.route('/open_chromatin_panicle/') + def send_open_chromatin_panicle_url(filename): + try: + return send_from_directory(const.OPEN_CHROMATIN_PANICLE, filename) + + except FileNotFoundError: + abort(404) + + @app.callback( + Output('igv-genomic-intervals', 'options'), + Output('igv-genomic-intervals', 'value'), + Input('homepage-genomic-intervals-submitted-input', 'data'), + + State('homepage-is-submitted', 'data'), + State('igv-selected-genomic-intervals-saved-input', 'data') + ) + def display_selected_genomic_intervals(nb_intervals_str, homepage_is_submitted, selected_nb_interval): + if homepage_is_submitted: + igv_options = nb_intervals_str.split(';') + + if not selected_nb_interval: + selected_nb_interval = igv_options[0] + + return igv_options, selected_nb_interval + + raise PreventUpdate + + @app.callback( + Output('igv-track-intro', 'children'), + Output('igv-track-filter', 'options'), + Output('igv-track-filter', 'value'), + Input('igv-selected-genomic-intervals-submitted-input', 'data'), + State('homepage-is-submitted', 'data'), + Input('igv-selected-tracks-submitted-input', 'data'), + State('igv-is-submitted', 'data') + ) + def display_igv_tracks_filter(nb_intervals_str, homepage_is_submitted, selected_tracks, igv_is_submitted): + if homepage_is_submitted and igv_is_submitted: + tracks = ['MSU V7 genes', 'chromatin open'] + + if not selected_tracks: + selected_tracks = [tracks[0]] + + return 'Select the tracks to be displayed', \ + tracks, selected_tracks + raise PreventUpdate + + @app.callback( + Output('igv-display', 'children'), + State('igv-selected-genomic-intervals-submitted-input', 'data'), + Input('igv-selected-tracks-submitted-input', 'data'), + State('homepage-is-submitted', 'data'), + State('igv-is-submitted', 'data'), + State('homepage-genomic-intervals-submitted-input', 'data') + ) + def display_igv(selected_nb_intervals_str, selected_tracks, homepage_is_submitted, igv_is_submitted, nb_intervals_str): + if homepage_is_submitted and igv_is_submitted: + track_info = [ + { + "name": "MSU V7 genes", + "format": "gff3", + "description": " Rice Genome Annotation Project", + "url": f"annotations_nb/{nb_intervals_str}/IRGSPMSU.gff.db/{selected_nb_intervals_str}/gff", + "displayMode": "EXPANDED", + "height": 200 + }, + { + "name": "chromatin open", + "format": "bed", + "description": " Rice Genome Annotation Project", + "url": f"open_chromatin_panicle/SRR7126116_ATAC-Seq_Panicles.bed", + "displayMode": "EXPANDED", + "height": 200 + } + ] + + display_tracks = [ + track for track in track_info if selected_tracks and track['name'] in selected_tracks] + + return html.Div([ + dashbio.Igv( + id='igv-Nipponbare-local', + reference={ + "id": "GCF_001433935.1", + "name": "O. sativa IRGSP-1.0 (GCF_001433935.1)", + "fastaURL": "genomes_nipponbare/Npb.fasta", + "indexURL": "genomes_nipponbare/Npb.fasta.fai", + "tracks": display_tracks + }, + locus=[selected_nb_intervals_str] + ) + ]) + + raise PreventUpdate + + @app.callback( + Output('igv-selected-genomic-intervals-saved-input', + 'data', allow_duplicate=True), + Input('igv-genomic-intervals', 'value'), + State('homepage-is-submitted', 'data'), + Input('igv-track-filter', 'value'), + + prevent_initial_call=True + ) + def set_input_igv_session_state(selected_nb_intervals_str, homepage_is_submitted, *_): + if homepage_is_submitted: + return selected_nb_intervals_str + + raise PreventUpdate + + @app.callback( + Output('igv-selected-tracks-submitted-input', + 'data', allow_duplicate=True), + Input('igv-track-filter', 'value'), + State('homepage-is-submitted', 'data'), + State('igv-is-submitted', 'data'), + prevent_initial_call=True + ) + def set_submitted_igv_session_state(selected_tracks, homepage_is_submitted, igv_is_submitted): + if homepage_is_submitted and igv_is_submitted: + return selected_tracks + + raise PreventUpdate diff --git a/callbacks/browse_loci/util.py b/callbacks/browse_loci/util.py index 10c1a661..729d5742 100644 --- a/callbacks/browse_loci/util.py +++ b/callbacks/browse_loci/util.py @@ -1,59 +1,59 @@ -from ..lift_over import util -import gffutils -import pandas as pd -import os -from ..file_util import * -from ..constants import Constants - -const = Constants() - - -def write_igv_tracks_to_file(nb_intervals_str): - # tracks found in igv - track_db = [[const.ANNOTATIONS_NB, 'IRGSPMSU.gff.db', 'gff'], - [const.OPEN_CHROMATIN_PANICLE, 'SRR7126116_ATAC-Seq_Panicles.bed', 'bed']] - - # write to file the data for igv - for db in track_db: - file_ext = db[2] - - if file_ext == 'gff': - source_dir = f'{db[0]}/{db[1]}' - source_file = db[1] - - write_gff_igv_track_to_file( - source_dir, source_file, nb_intervals_str) - - -def write_gff_igv_track_to_file(source_dir, source_file, nb_intervals_str): - if path_exists(source_dir): - loci_list = nb_intervals_str.split(';') - genomic_interval_list = util.get_genomic_intervals_from_input( - nb_intervals_str) - - temp_folder = get_path_to_temp( - nb_intervals_str, const.TEMP_IGV, source_file) - make_dir(temp_folder) - - for i in range(len(loci_list)): - cur_loci = loci_list[i] - - dest_file = f'{convert_text_to_path(cur_loci)}.gff' - dest_dir = f'{temp_folder}/{dest_file}' - - if not path_exists(dest_dir): - genes_in_interval = get_loci_data_in_gff_file( - source_dir, genomic_interval_list[i]) - - with open(dest_dir, 'w') as fp: - for line in genes_in_interval: - fp.write('%s\n' % line) - - -def get_loci_data_in_gff_file(source_dir, nb_interval): - db = gffutils.FeatureDB(f'{source_dir}', keep_order=True) - - genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), - completely_within=False, featuretype='gene')) - - return genes_in_interval +from ..lift_over import util +import gffutils +import pandas as pd +import os +from ..file_util import * +from ..constants import Constants + +const = Constants() + + +def write_igv_tracks_to_file(nb_intervals_str): + # tracks found in igv + track_db = [[const.ANNOTATIONS_NB, 'IRGSPMSU.gff.db', 'gff'], + [const.OPEN_CHROMATIN_PANICLE, 'SRR7126116_ATAC-Seq_Panicles.bed', 'bed']] + + # write to file the data for igv + for db in track_db: + file_ext = db[2] + + if file_ext == 'gff': + source_dir = f'{db[0]}/{db[1]}' + source_file = db[1] + + write_gff_igv_track_to_file( + source_dir, source_file, nb_intervals_str) + + +def write_gff_igv_track_to_file(source_dir, source_file, nb_intervals_str): + if path_exists(source_dir): + loci_list = nb_intervals_str.split(';') + genomic_interval_list = util.get_genomic_intervals_from_input( + nb_intervals_str) + + temp_folder = get_path_to_temp( + nb_intervals_str, const.TEMP_IGV, source_file) + make_dir(temp_folder) + + for i in range(len(loci_list)): + cur_loci = loci_list[i] + + dest_file = f'{convert_text_to_path(cur_loci)}.gff' + dest_dir = f'{temp_folder}/{dest_file}' + + if not path_exists(dest_dir): + genes_in_interval = get_loci_data_in_gff_file( + source_dir, genomic_interval_list[i]) + + with open(dest_dir, 'w') as fp: + for line in genes_in_interval: + fp.write('%s\n' % line) + + +def get_loci_data_in_gff_file(source_dir, nb_interval): + db = gffutils.FeatureDB(f'{source_dir}', keep_order=True) + + genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), + completely_within=False, featuretype='gene')) + + return genes_in_interval diff --git a/callbacks/coexpression/callbacks.py b/callbacks/coexpression/callbacks.py index d4e84740..44770564 100644 --- a/callbacks/coexpression/callbacks.py +++ b/callbacks/coexpression/callbacks.py @@ -1,515 +1,515 @@ -from dash import Input, Output, State, html, dcc -from dash.exceptions import PreventUpdate -from collections import namedtuple - -from .util import * -from ..lift_over import util as lift_over_util -from ..branch import * - -Input_parameter_module = namedtuple('Input_parameter_module', [ - 'param_slider_marks', 'param_slider_value']) - -Submitted_parameter_module = namedtuple('Submitted_parameter_module', [ - 'param_slider_marks', 'param_slider_value', 'param_module', 'layout', 'pathway_active_tab']) - - -def init_callback(app): - @app.callback( - Output('coexpression-genomic-intervals-input', 'children'), - State('homepage-genomic-intervals-submitted-input', 'data'), - Input('homepage-is-submitted', 'data'), - Input('coexpression-submit', 'n_clicks') - ) - def display_input(nb_intervals_str, homepage_is_submitted, *_): - if homepage_is_submitted: - if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): - return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] - - return None - - raise PreventUpdate - - @app.callback( - Output('coexpression-is-submitted', 'data', allow_duplicate=True), - Output('coexpression-submitted-addl-genes', - 'data', allow_duplicate=True), - Output('coexpression-combined-genes', - 'data', allow_duplicate=True), - - Output('coexpression-submitted-network', - 'data', allow_duplicate=True), - Output('coexpression-submitted-clustering-algo', - 'data', allow_duplicate=True), - Output('coexpression-submitted-parameter-module', - 'data', allow_duplicate=True), - - Input('coexpression-submit', 'n_clicks'), - State('homepage-is-submitted', 'data'), - - State('lift-over-nb-table', 'data'), - - State('coexpression-addl-genes', 'value'), - State('coexpression-network', 'value'), - State('coexpression-clustering-algo', 'value'), - State('coexpression-parameter-slider', 'marks'), - State('coexpression-parameter-slider', 'value'), - prevent_initial_call=True - ) - def submit_coexpression_input(coexpression_submit_n_clicks, homepage_is_submitted, - implicated_gene_ids, submitted_addl_genes, - submitted_network, submitted_algo, submitted_slider_marks, submitted_slider_value): - if homepage_is_submitted and coexpression_submit_n_clicks >= 1: - paramater_module_value = Submitted_parameter_module( - submitted_slider_marks, submitted_slider_value, '', 'circle', 'tab-0')._asdict() - - submitted_parameter_module = { - submitted_algo: paramater_module_value} - - if submitted_addl_genes: - submitted_addl_genes = submitted_addl_genes.strip() - else: - submitted_addl_genes = '' - - list_addl_genes = list( - filter(None, [gene.strip() for gene in submitted_addl_genes.split(';')])) - - gene_ids = list(set.union( - set(implicated_gene_ids), set(list_addl_genes))) - - return True, submitted_addl_genes, gene_ids, submitted_network, submitted_algo, submitted_parameter_module - - raise PreventUpdate - - @app.callback( - Output('coexpression-results-container', 'style'), - Input('coexpression-is-submitted', 'data'), - ) - def display_coexpression_output(coexpression_is_submitted): - if coexpression_is_submitted: - return {'display': 'block'} - - else: - return {'display': 'none'} - - @app.callback( - Output('coexpression-parameter-slider', 'marks'), - Output('coexpression-parameter-slider', 'value'), - Input('coexpression-clustering-algo', 'value'), - State('coexpression-parameter-module-saved-input', 'data') - ) - def set_parameter_slider(algo, parameter_module): - if parameter_module and algo in parameter_module: - return parameter_module[algo]['param_slider_marks'], parameter_module[algo]['param_slider_value'] - - return get_parameters_for_algo(algo), module_detection_algos[algo].default_param * module_detection_algos[algo].multiplier - - @app.callback( - Output('coexpression-module-graph', 'elements'), - Output('coexpression-module-graph', 'layout'), - Output('coexpression-module-graph', 'style', allow_duplicate=True), - Output('coexpression-graph-container', 'style'), - - Input('coexpression-combined-genes', 'data'), - - Input('coexpression-submitted-network', 'data'), - Input('coexpression-submitted-clustering-algo', 'data'), - State('coexpression-is-submitted', 'data'), - State('coexpression-submitted-parameter-module', 'data'), - - prevent_initial_call=True - ) - def hide_table_graph(combined_gene_ids, submitted_network, submitted_algo, coexpression_is_submitted, submitted_parameter_module): - if coexpression_is_submitted: - if submitted_algo and submitted_algo in submitted_parameter_module: - parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] - layout = submitted_parameter_module[submitted_algo]['layout'] - - return load_module_graph( - combined_gene_ids, None, submitted_network, submitted_algo, parameters, layout) + ({'visibility': 'hidden'}, ) - - raise PreventUpdate - - @app.callback( - Output('coexpression-table-container', 'style', allow_duplicate=True), - Input('coexpression-submit', 'n_clicks'), - - prevent_initial_call=True - ) - def hide_table(*_): - return {'visibility': 'hidden'} - - @app.callback( - Output('coexpression-module-graph', 'style', allow_duplicate=True), - Input('coexpression-modules', 'value'), - - prevent_initial_call=True - ) - def hide_graph(*_): - return {'visibility': 'hidden'} - - @app.callback( - Output('coexpression-modules', 'options'), - Output('coexpression-modules', 'value'), - Output('coexpression-results-module-tabs-container', 'style'), - Output('coexpression-module-stats', 'children'), - - State('homepage-genomic-intervals-submitted-input', 'data'), - - Input('coexpression-combined-genes', 'data'), - Input('coexpression-submitted-addl-genes', 'data'), - - Input('coexpression-submitted-network', 'data'), - Input('coexpression-submitted-clustering-algo', 'data'), - State('homepage-is-submitted', 'data'), - State('coexpression-submitted-parameter-module', 'data'), - State('coexpression-is-submitted', 'data') - ) - def perform_module_enrichment(genomic_intervals, combined_gene_ids, submitted_addl_genes, - submitted_network, submitted_algo, homepage_is_submitted, submitted_parameter_module, coexpression_is_submitted): - if homepage_is_submitted: - if coexpression_is_submitted: - if submitted_algo and submitted_algo in submitted_parameter_module: - parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] - - enriched_modules = do_module_enrichment_analysis( - combined_gene_ids, genomic_intervals, submitted_addl_genes, submitted_network, submitted_algo, parameters) - - # Display statistics - num_enriched_modules = len(enriched_modules) - total_num_modules = count_modules( - submitted_network, submitted_algo, parameters) - stats = f'{num_enriched_modules} out of {total_num_modules} ' - if total_num_modules == 1: - stats += 'module ' - else: - stats += 'modules ' - - if num_enriched_modules == 1: - stats += 'was found to be enriched (adjusted p-value < 0.05).' - else: - stats += 'were found to be enriched (adjusted p-value < 0.05).' - - first_module = None - if enriched_modules: - first_module = enriched_modules[0] - else: - return enriched_modules, first_module, {'display': 'none'}, stats - - if submitted_parameter_module and submitted_algo in submitted_parameter_module: - if submitted_parameter_module[submitted_algo]['param_module']: - first_module = submitted_parameter_module[submitted_algo]['param_module'] - - return enriched_modules, first_module, {'display': 'block'}, stats - - raise PreventUpdate - - @app.callback( - Output('coexpression-pathways', 'data'), - Output('coexpression-pathways', 'columns'), - Output('coexpression-graph-stats', 'children'), - Output('coexpression-table-stats', 'children'), - - Output('coexpression-table-container', 'style'), - - Input('coexpression-combined-genes', 'data'), - Input('coexpression-submitted-network', 'data'), - Input('coexpression-submitted-clustering-algo', 'data'), - Input('coexpression-modules-pathway', 'active_tab'), - Input('coexpression-modules', 'value'), - State('coexpression-submitted-parameter-module', 'data'), - State('coexpression-is-submitted', 'data') - ) - def display_pathways(combined_gene_ids, - submitted_network, submitted_algo, active_tab, module, submitted_parameter_module, coexpression_is_submitted): - if coexpression_is_submitted: - if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: - parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] - - try: - module_idx = module.split(' ')[1] - table, _ = convert_to_df( - active_tab, module_idx, submitted_network, submitted_algo, parameters) - except Exception: - table, _ = convert_to_df( - active_tab, None, submitted_network, submitted_algo, parameters) - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in table.columns] - - num_enriched = get_num_unique_entries(table, 'ID') - if num_enriched == 1: - stats = f'This module is enriched in {num_enriched} {get_noun_for_active_tab(active_tab).singular}.' - else: - stats = f'This module is enriched in {num_enriched} {get_noun_for_active_tab(active_tab).plural}.' - - graph_stats = 'The selected module has ' - try: - total_num_genes, num_combined_gene_ids = count_genes_in_module( - combined_gene_ids, int(module_idx), submitted_network, submitted_algo, parameters) - except UnboundLocalError: - total_num_genes, num_combined_gene_ids = 0, 0 - - if total_num_genes == 1: - graph_stats += f'{total_num_genes} gene, of which {num_combined_gene_ids} ' - else: - graph_stats += f'{total_num_genes} genes, of which {num_combined_gene_ids} ' - - if num_combined_gene_ids == 1: - graph_stats += 'is implicated by your GWAS/QTL or part of the gene list you manually entered.' - else: - graph_stats += 'are implicated by your GWAS/QTL or part of the gene list you manually entered.' - - if total_num_genes == 0: - return table.to_dict('records'), columns, graph_stats, stats, {'display': 'none'} - else: - return table.to_dict('records'), columns, graph_stats, stats, {'visibility': 'visible'} - - raise PreventUpdate - - @app.callback( - Output('coexpression-module-graph', 'elements', allow_duplicate=True), - Output('coexpression-module-graph', 'layout', allow_duplicate=True), - Output('coexpression-module-graph', 'style', allow_duplicate=True), - Output('coexpression-graph-container', 'style', allow_duplicate=True), - Output('coexpression-extra-bottom-div', 'style', allow_duplicate=True), - - Input('coexpression-combined-genes', 'data'), - Input('coexpression-modules', 'value'), - - State('coexpression-submitted-network', 'data'), - State('coexpression-submitted-clustering-algo', 'data'), - State('coexpression-submitted-parameter-module', 'data'), - - Input('coexpression-graph-layout', 'value'), - State('coexpression-is-submitted', 'data'), - - State('coexpression-modules', 'options'), - - Input('coexpression-reset-graph', 'n_clicks'), - - prevent_initial_call=True - ) - def display_table_graph(combined_gene_ids, module, submitted_network, submitted_algo, submitted_parameter_module, - layout, coexpression_is_submitted, modules, *_): - if coexpression_is_submitted: - if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: - parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] - - if not modules: - module_graph = load_module_graph( - combined_gene_ids, None, submitted_network, submitted_algo, parameters, layout) - else: - module_graph = load_module_graph( - combined_gene_ids, module, submitted_network, submitted_algo, parameters, layout) - - # No enriched modules - if not modules: - return module_graph + ({'display': 'none'}, {'height': '0em'}) - - return module_graph + ({'visibility': 'visible', 'width': '100%', - 'height': '100vh'}, {'height': '1.5em'}) - - raise PreventUpdate - - @app.callback( - Output('coexpression-addl-genes-saved-input', - 'data', allow_duplicate=True), - Output('coexpression-network-saved-input', - 'data', allow_duplicate=True), - Output('coexpression-clustering-algo-saved-input', - 'data', allow_duplicate=True), - Output('coexpression-parameter-module-saved-input', - 'data', allow_duplicate=True), - - State('coexpression-addl-genes', 'value'), - Input('coexpression-network', 'value'), - Input('coexpression-clustering-algo', 'value'), - Input('coexpression-parameter-slider', 'value'), - State('coexpression-parameter-slider', 'marks'), - State('homepage-is-submitted', 'data'), - State('coexpression-parameter-module-saved-input', 'data'), - prevent_initial_call='True' - ) - def set_input_coexpression_session_state(addl_genes, network, algo, parameter_value, parameter_mark, homepage_is_submitted, input_parameter_module): - if homepage_is_submitted: - input_paramater_module_value = Input_parameter_module( - parameter_mark, parameter_value)._asdict() - - if input_parameter_module: - input_parameter_module[algo] = input_paramater_module_value - - else: - input_parameter_module = {algo: input_paramater_module_value} - - return addl_genes, network, algo, input_parameter_module - - raise PreventUpdate - - @app.callback( - Output('coexpression-submitted-parameter-module', - 'data', allow_duplicate=True), - - Input('coexpression-modules', 'value'), - Input('coexpression-graph-layout', 'value'), - Input('coexpression-modules-pathway', 'active_tab'), - - State('coexpression-submitted-network', 'data'), - State('coexpression-submitted-clustering-algo', 'data'), - State('homepage-is-submitted', 'data'), - State('coexpression-submitted-parameter-module', 'data'), - prevent_initial_call=True - ) - def set_submitted_coexpression_session_state(module, layout, active_tab, submitted_network, submitted_algo, homepage_is_submitted, submitted_parameter_module): - if homepage_is_submitted: - if submitted_network and submitted_parameter_module and submitted_algo in submitted_parameter_module: - submitted_parameter_module[submitted_algo]['param_module'] = module - submitted_parameter_module[submitted_algo]['layout'] = layout - submitted_parameter_module[submitted_algo]['pathway_active_tab'] = active_tab - - return submitted_parameter_module - - raise PreventUpdate - - @app.callback( - Output('coexpression-addl-genes', 'value'), - - State('homepage-is-submitted', 'data'), - State('coexpression-addl-genes-saved-input', 'data'), - - Input('homepage-genomic-intervals-submitted-input', 'data') - ) - def display_submitted_addl_genes(homepage_is_submitted, addl_genes, *_): - if homepage_is_submitted: - if not addl_genes: - return '' - - return addl_genes - - raise PreventUpdate - - @app.callback( - Output('coexpression-network', 'value'), - - State('homepage-is-submitted', 'data'), - State('coexpression-network-saved-input', 'data'), - - Input('homepage-genomic-intervals-submitted-input', 'data') - ) - def display_selected_coexpression_network(homepage_is_submitted, network, *_): - if homepage_is_submitted: - if not network: - return 'OS-CX' - - return network - - raise PreventUpdate - - @app.callback( - Output('coexpression-clustering-algo', 'value'), - - State('homepage-is-submitted', 'data'), - State('coexpression-clustering-algo-saved-input', 'data'), - - Input('homepage-genomic-intervals-submitted-input', 'data') - ) - def get_selected_clustering_algo(homepage_is_submitted, algo, *_): - if homepage_is_submitted: - if not algo: - return 'clusterone' - - return algo - - raise PreventUpdate - - @app.callback( - Output('coexpression-graph-layout', 'value'), - Output('coexpression-modules-pathway', 'active_tab'), - - Input('coexpression-submitted-network', 'data'), - Input('coexpression-submitted-clustering-algo', 'data'), - State('coexpression-is-submitted', 'data'), - State('coexpression-submitted-parameter-module', 'data') - ) - def display_selected_graph_layout(submitted_network, submitted_algo, coexpression_is_submitted, submitted_parameter_module): - if coexpression_is_submitted: - if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: - layout = 'circle' - if submitted_parameter_module[submitted_algo]['layout']: - layout = submitted_parameter_module[submitted_algo]['layout'] - - active_tab = 'tab-0' - if submitted_parameter_module[submitted_algo]['pathway_active_tab']: - active_tab = submitted_parameter_module[submitted_algo]['pathway_active_tab'] - - return layout, active_tab - - raise PreventUpdate - - @app.callback( - Output('coexpression-input', 'children'), - Input('coexpression-is-submitted', 'data'), - State('coexpression-addl-genes', 'value'), - State('coexpression-network', 'value'), - State('coexpression-clustering-algo', 'value'), - State('coexpression-parameter-slider', 'value') - ) - def display_coexpression_submitted_input(coexpression_is_submitted, genes, network, algo, parameters): - if coexpression_is_submitted: - if not genes: - genes = 'None' - else: - genes = '; '.join( - list(filter(None, [gene.strip() for gene in genes.split(';')]))) - - return [html.B('Additional Genes: '), genes, - html.Br(), - html.B('Selected Co-Expression Network: '), get_user_facing_network( - network), - html.Br(), - html.B('Selected Module Detection Algorithm: '), get_user_facing_algo( - algo), - html.Br(), - html.B('Selected Algorithm Parameter: '), get_user_facing_parameter(algo, parameters)] - - raise PreventUpdate - - @app.callback( - Output('coexpression-clustering-algo-modal', 'is_open'), - Input('coexpression-clustering-algo-tooltip', 'n_clicks') - ) - def open_modals(tooltip_n_clicks): - if tooltip_n_clicks > 0: - return True - - @app.callback( - Output('coexpression-pathways', 'filter_query'), - Input('coexpression-modules-pathway', 'active_tab'), - Input('coexpression-reset-table', 'n_clicks') - ) - def reset_table_filters(*_): - return '' - - @app.callback( - Output('coexpression-download-df-to-csv', 'data'), - Input('coexpression-export-table', 'n_clicks'), - State('coexpression-pathways', 'data'), - State('homepage-genomic-intervals-submitted-input', 'data') - ) - def download_coexpression_table_to_csv(download_n_clicks, coexpression_df, genomic_intervals): - if download_n_clicks >= 1: - df = pd.DataFrame(coexpression_df) - return dcc.send_data_frame(df.to_csv, f'[{genomic_intervals}] Co-Expression Network Analysis Table.csv', index=False) - - raise PreventUpdate - - @app.callback( - Output('coexpression-download-graph-to-json', 'data'), - Input('coexpression-export-graph', 'n_clicks'), - State('coexpression-module-graph', 'elements'), - State('homepage-genomic-intervals-submitted-input', 'data') - ) - def download_coexpression_table_to_csv(download_n_clicks, coexpression_dict, genomic_intervals): - if download_n_clicks >= 1: - return dict(content='Hello world!', filename=f'[{genomic_intervals}] Co-Expression Network Analysis Graph.txt') - - raise PreventUpdate +from dash import Input, Output, State, html, dcc +from dash.exceptions import PreventUpdate +from collections import namedtuple + +from .util import * +from ..lift_over import util as lift_over_util +from ..branch import * + +Input_parameter_module = namedtuple('Input_parameter_module', [ + 'param_slider_marks', 'param_slider_value']) + +Submitted_parameter_module = namedtuple('Submitted_parameter_module', [ + 'param_slider_marks', 'param_slider_value', 'param_module', 'layout', 'pathway_active_tab']) + + +def init_callback(app): + @app.callback( + Output('coexpression-genomic-intervals-input', 'children'), + State('homepage-genomic-intervals-submitted-input', 'data'), + Input('homepage-is-submitted', 'data'), + Input('coexpression-submit', 'n_clicks') + ) + def display_input(nb_intervals_str, homepage_is_submitted, *_): + if homepage_is_submitted: + if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): + return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] + + return None + + raise PreventUpdate + + @app.callback( + Output('coexpression-is-submitted', 'data', allow_duplicate=True), + Output('coexpression-submitted-addl-genes', + 'data', allow_duplicate=True), + Output('coexpression-combined-genes', + 'data', allow_duplicate=True), + + Output('coexpression-submitted-network', + 'data', allow_duplicate=True), + Output('coexpression-submitted-clustering-algo', + 'data', allow_duplicate=True), + Output('coexpression-submitted-parameter-module', + 'data', allow_duplicate=True), + + Input('coexpression-submit', 'n_clicks'), + State('homepage-is-submitted', 'data'), + + State('lift-over-nb-table', 'data'), + + State('coexpression-addl-genes', 'value'), + State('coexpression-network', 'value'), + State('coexpression-clustering-algo', 'value'), + State('coexpression-parameter-slider', 'marks'), + State('coexpression-parameter-slider', 'value'), + prevent_initial_call=True + ) + def submit_coexpression_input(coexpression_submit_n_clicks, homepage_is_submitted, + implicated_gene_ids, submitted_addl_genes, + submitted_network, submitted_algo, submitted_slider_marks, submitted_slider_value): + if homepage_is_submitted and coexpression_submit_n_clicks >= 1: + paramater_module_value = Submitted_parameter_module( + submitted_slider_marks, submitted_slider_value, '', 'circle', 'tab-0')._asdict() + + submitted_parameter_module = { + submitted_algo: paramater_module_value} + + if submitted_addl_genes: + submitted_addl_genes = submitted_addl_genes.strip() + else: + submitted_addl_genes = '' + + list_addl_genes = list( + filter(None, [gene.strip() for gene in submitted_addl_genes.split(';')])) + + gene_ids = list(set.union( + set(implicated_gene_ids), set(list_addl_genes))) + + return True, submitted_addl_genes, gene_ids, submitted_network, submitted_algo, submitted_parameter_module + + raise PreventUpdate + + @app.callback( + Output('coexpression-results-container', 'style'), + Input('coexpression-is-submitted', 'data'), + ) + def display_coexpression_output(coexpression_is_submitted): + if coexpression_is_submitted: + return {'display': 'block'} + + else: + return {'display': 'none'} + + @app.callback( + Output('coexpression-parameter-slider', 'marks'), + Output('coexpression-parameter-slider', 'value'), + Input('coexpression-clustering-algo', 'value'), + State('coexpression-parameter-module-saved-input', 'data') + ) + def set_parameter_slider(algo, parameter_module): + if parameter_module and algo in parameter_module: + return parameter_module[algo]['param_slider_marks'], parameter_module[algo]['param_slider_value'] + + return get_parameters_for_algo(algo), module_detection_algos[algo].default_param * module_detection_algos[algo].multiplier + + @app.callback( + Output('coexpression-module-graph', 'elements'), + Output('coexpression-module-graph', 'layout'), + Output('coexpression-module-graph', 'style', allow_duplicate=True), + Output('coexpression-graph-container', 'style'), + + Input('coexpression-combined-genes', 'data'), + + Input('coexpression-submitted-network', 'data'), + Input('coexpression-submitted-clustering-algo', 'data'), + State('coexpression-is-submitted', 'data'), + State('coexpression-submitted-parameter-module', 'data'), + + prevent_initial_call=True + ) + def hide_table_graph(combined_gene_ids, submitted_network, submitted_algo, coexpression_is_submitted, submitted_parameter_module): + if coexpression_is_submitted: + if submitted_algo and submitted_algo in submitted_parameter_module: + parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] + layout = submitted_parameter_module[submitted_algo]['layout'] + + return load_module_graph( + combined_gene_ids, None, submitted_network, submitted_algo, parameters, layout) + ({'visibility': 'hidden'}, ) + + raise PreventUpdate + + @app.callback( + Output('coexpression-table-container', 'style', allow_duplicate=True), + Input('coexpression-submit', 'n_clicks'), + + prevent_initial_call=True + ) + def hide_table(*_): + return {'visibility': 'hidden'} + + @app.callback( + Output('coexpression-module-graph', 'style', allow_duplicate=True), + Input('coexpression-modules', 'value'), + + prevent_initial_call=True + ) + def hide_graph(*_): + return {'visibility': 'hidden'} + + @app.callback( + Output('coexpression-modules', 'options'), + Output('coexpression-modules', 'value'), + Output('coexpression-results-module-tabs-container', 'style'), + Output('coexpression-module-stats', 'children'), + + State('homepage-genomic-intervals-submitted-input', 'data'), + + Input('coexpression-combined-genes', 'data'), + Input('coexpression-submitted-addl-genes', 'data'), + + Input('coexpression-submitted-network', 'data'), + Input('coexpression-submitted-clustering-algo', 'data'), + State('homepage-is-submitted', 'data'), + State('coexpression-submitted-parameter-module', 'data'), + State('coexpression-is-submitted', 'data') + ) + def perform_module_enrichment(genomic_intervals, combined_gene_ids, submitted_addl_genes, + submitted_network, submitted_algo, homepage_is_submitted, submitted_parameter_module, coexpression_is_submitted): + if homepage_is_submitted: + if coexpression_is_submitted: + if submitted_algo and submitted_algo in submitted_parameter_module: + parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] + + enriched_modules = do_module_enrichment_analysis( + combined_gene_ids, genomic_intervals, submitted_addl_genes, submitted_network, submitted_algo, parameters) + + # Display statistics + num_enriched_modules = len(enriched_modules) + total_num_modules = count_modules( + submitted_network, submitted_algo, parameters) + stats = f'{num_enriched_modules} out of {total_num_modules} ' + if total_num_modules == 1: + stats += 'module ' + else: + stats += 'modules ' + + if num_enriched_modules == 1: + stats += 'was found to be enriched (adjusted p-value < 0.05).' + else: + stats += 'were found to be enriched (adjusted p-value < 0.05).' + + first_module = None + if enriched_modules: + first_module = enriched_modules[0] + else: + return enriched_modules, first_module, {'display': 'none'}, stats + + if submitted_parameter_module and submitted_algo in submitted_parameter_module: + if submitted_parameter_module[submitted_algo]['param_module']: + first_module = submitted_parameter_module[submitted_algo]['param_module'] + + return enriched_modules, first_module, {'display': 'block'}, stats + + raise PreventUpdate + + @app.callback( + Output('coexpression-pathways', 'data'), + Output('coexpression-pathways', 'columns'), + Output('coexpression-graph-stats', 'children'), + Output('coexpression-table-stats', 'children'), + + Output('coexpression-table-container', 'style'), + + Input('coexpression-combined-genes', 'data'), + Input('coexpression-submitted-network', 'data'), + Input('coexpression-submitted-clustering-algo', 'data'), + Input('coexpression-modules-pathway', 'active_tab'), + Input('coexpression-modules', 'value'), + State('coexpression-submitted-parameter-module', 'data'), + State('coexpression-is-submitted', 'data') + ) + def display_pathways(combined_gene_ids, + submitted_network, submitted_algo, active_tab, module, submitted_parameter_module, coexpression_is_submitted): + if coexpression_is_submitted: + if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: + parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] + + try: + module_idx = module.split(' ')[1] + table, _ = convert_to_df( + active_tab, module_idx, submitted_network, submitted_algo, parameters) + except Exception: + table, _ = convert_to_df( + active_tab, None, submitted_network, submitted_algo, parameters) + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in table.columns] + + num_enriched = get_num_unique_entries(table, 'ID') + if num_enriched == 1: + stats = f'This module is enriched in {num_enriched} {get_noun_for_active_tab(active_tab).singular}.' + else: + stats = f'This module is enriched in {num_enriched} {get_noun_for_active_tab(active_tab).plural}.' + + graph_stats = 'The selected module has ' + try: + total_num_genes, num_combined_gene_ids = count_genes_in_module( + combined_gene_ids, int(module_idx), submitted_network, submitted_algo, parameters) + except UnboundLocalError: + total_num_genes, num_combined_gene_ids = 0, 0 + + if total_num_genes == 1: + graph_stats += f'{total_num_genes} gene, of which {num_combined_gene_ids} ' + else: + graph_stats += f'{total_num_genes} genes, of which {num_combined_gene_ids} ' + + if num_combined_gene_ids == 1: + graph_stats += 'is implicated by your GWAS/QTL or part of the gene list you manually entered.' + else: + graph_stats += 'are implicated by your GWAS/QTL or part of the gene list you manually entered.' + + if total_num_genes == 0: + return table.to_dict('records'), columns, graph_stats, stats, {'display': 'none'} + else: + return table.to_dict('records'), columns, graph_stats, stats, {'visibility': 'visible'} + + raise PreventUpdate + + @app.callback( + Output('coexpression-module-graph', 'elements', allow_duplicate=True), + Output('coexpression-module-graph', 'layout', allow_duplicate=True), + Output('coexpression-module-graph', 'style', allow_duplicate=True), + Output('coexpression-graph-container', 'style', allow_duplicate=True), + Output('coexpression-extra-bottom-div', 'style', allow_duplicate=True), + + Input('coexpression-combined-genes', 'data'), + Input('coexpression-modules', 'value'), + + State('coexpression-submitted-network', 'data'), + State('coexpression-submitted-clustering-algo', 'data'), + State('coexpression-submitted-parameter-module', 'data'), + + Input('coexpression-graph-layout', 'value'), + State('coexpression-is-submitted', 'data'), + + State('coexpression-modules', 'options'), + + Input('coexpression-reset-graph', 'n_clicks'), + + prevent_initial_call=True + ) + def display_table_graph(combined_gene_ids, module, submitted_network, submitted_algo, submitted_parameter_module, + layout, coexpression_is_submitted, modules, *_): + if coexpression_is_submitted: + if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: + parameters = submitted_parameter_module[submitted_algo]['param_slider_value'] + + if not modules: + module_graph = load_module_graph( + combined_gene_ids, None, submitted_network, submitted_algo, parameters, layout) + else: + module_graph = load_module_graph( + combined_gene_ids, module, submitted_network, submitted_algo, parameters, layout) + + # No enriched modules + if not modules: + return module_graph + ({'display': 'none'}, {'height': '0em'}) + + return module_graph + ({'visibility': 'visible', 'width': '100%', + 'height': '100vh'}, {'height': '1.5em'}) + + raise PreventUpdate + + @app.callback( + Output('coexpression-addl-genes-saved-input', + 'data', allow_duplicate=True), + Output('coexpression-network-saved-input', + 'data', allow_duplicate=True), + Output('coexpression-clustering-algo-saved-input', + 'data', allow_duplicate=True), + Output('coexpression-parameter-module-saved-input', + 'data', allow_duplicate=True), + + State('coexpression-addl-genes', 'value'), + Input('coexpression-network', 'value'), + Input('coexpression-clustering-algo', 'value'), + Input('coexpression-parameter-slider', 'value'), + State('coexpression-parameter-slider', 'marks'), + State('homepage-is-submitted', 'data'), + State('coexpression-parameter-module-saved-input', 'data'), + prevent_initial_call='True' + ) + def set_input_coexpression_session_state(addl_genes, network, algo, parameter_value, parameter_mark, homepage_is_submitted, input_parameter_module): + if homepage_is_submitted: + input_paramater_module_value = Input_parameter_module( + parameter_mark, parameter_value)._asdict() + + if input_parameter_module: + input_parameter_module[algo] = input_paramater_module_value + + else: + input_parameter_module = {algo: input_paramater_module_value} + + return addl_genes, network, algo, input_parameter_module + + raise PreventUpdate + + @app.callback( + Output('coexpression-submitted-parameter-module', + 'data', allow_duplicate=True), + + Input('coexpression-modules', 'value'), + Input('coexpression-graph-layout', 'value'), + Input('coexpression-modules-pathway', 'active_tab'), + + State('coexpression-submitted-network', 'data'), + State('coexpression-submitted-clustering-algo', 'data'), + State('homepage-is-submitted', 'data'), + State('coexpression-submitted-parameter-module', 'data'), + prevent_initial_call=True + ) + def set_submitted_coexpression_session_state(module, layout, active_tab, submitted_network, submitted_algo, homepage_is_submitted, submitted_parameter_module): + if homepage_is_submitted: + if submitted_network and submitted_parameter_module and submitted_algo in submitted_parameter_module: + submitted_parameter_module[submitted_algo]['param_module'] = module + submitted_parameter_module[submitted_algo]['layout'] = layout + submitted_parameter_module[submitted_algo]['pathway_active_tab'] = active_tab + + return submitted_parameter_module + + raise PreventUpdate + + @app.callback( + Output('coexpression-addl-genes', 'value'), + + State('homepage-is-submitted', 'data'), + State('coexpression-addl-genes-saved-input', 'data'), + + Input('homepage-genomic-intervals-submitted-input', 'data') + ) + def display_submitted_addl_genes(homepage_is_submitted, addl_genes, *_): + if homepage_is_submitted: + if not addl_genes: + return '' + + return addl_genes + + raise PreventUpdate + + @app.callback( + Output('coexpression-network', 'value'), + + State('homepage-is-submitted', 'data'), + State('coexpression-network-saved-input', 'data'), + + Input('homepage-genomic-intervals-submitted-input', 'data') + ) + def display_selected_coexpression_network(homepage_is_submitted, network, *_): + if homepage_is_submitted: + if not network: + return 'OS-CX' + + return network + + raise PreventUpdate + + @app.callback( + Output('coexpression-clustering-algo', 'value'), + + State('homepage-is-submitted', 'data'), + State('coexpression-clustering-algo-saved-input', 'data'), + + Input('homepage-genomic-intervals-submitted-input', 'data') + ) + def get_selected_clustering_algo(homepage_is_submitted, algo, *_): + if homepage_is_submitted: + if not algo: + return 'clusterone' + + return algo + + raise PreventUpdate + + @app.callback( + Output('coexpression-graph-layout', 'value'), + Output('coexpression-modules-pathway', 'active_tab'), + + Input('coexpression-submitted-network', 'data'), + Input('coexpression-submitted-clustering-algo', 'data'), + State('coexpression-is-submitted', 'data'), + State('coexpression-submitted-parameter-module', 'data') + ) + def display_selected_graph_layout(submitted_network, submitted_algo, coexpression_is_submitted, submitted_parameter_module): + if coexpression_is_submitted: + if submitted_network and submitted_algo and submitted_algo in submitted_parameter_module: + layout = 'circle' + if submitted_parameter_module[submitted_algo]['layout']: + layout = submitted_parameter_module[submitted_algo]['layout'] + + active_tab = 'tab-0' + if submitted_parameter_module[submitted_algo]['pathway_active_tab']: + active_tab = submitted_parameter_module[submitted_algo]['pathway_active_tab'] + + return layout, active_tab + + raise PreventUpdate + + @app.callback( + Output('coexpression-input', 'children'), + Input('coexpression-is-submitted', 'data'), + State('coexpression-addl-genes', 'value'), + State('coexpression-network', 'value'), + State('coexpression-clustering-algo', 'value'), + State('coexpression-parameter-slider', 'value') + ) + def display_coexpression_submitted_input(coexpression_is_submitted, genes, network, algo, parameters): + if coexpression_is_submitted: + if not genes: + genes = 'None' + else: + genes = '; '.join( + list(filter(None, [gene.strip() for gene in genes.split(';')]))) + + return [html.B('Additional Genes: '), genes, + html.Br(), + html.B('Selected Co-Expression Network: '), get_user_facing_network( + network), + html.Br(), + html.B('Selected Module Detection Algorithm: '), get_user_facing_algo( + algo), + html.Br(), + html.B('Selected Algorithm Parameter: '), get_user_facing_parameter(algo, parameters)] + + raise PreventUpdate + + @app.callback( + Output('coexpression-clustering-algo-modal', 'is_open'), + Input('coexpression-clustering-algo-tooltip', 'n_clicks') + ) + def open_modals(tooltip_n_clicks): + if tooltip_n_clicks > 0: + return True + + @app.callback( + Output('coexpression-pathways', 'filter_query'), + Input('coexpression-modules-pathway', 'active_tab'), + Input('coexpression-reset-table', 'n_clicks') + ) + def reset_table_filters(*_): + return '' + + @app.callback( + Output('coexpression-download-df-to-csv', 'data'), + Input('coexpression-export-table', 'n_clicks'), + State('coexpression-pathways', 'data'), + State('homepage-genomic-intervals-submitted-input', 'data') + ) + def download_coexpression_table_to_csv(download_n_clicks, coexpression_df, genomic_intervals): + if download_n_clicks >= 1: + df = pd.DataFrame(coexpression_df) + return dcc.send_data_frame(df.to_csv, f'[{genomic_intervals}] Co-Expression Network Analysis Table.csv', index=False) + + raise PreventUpdate + + @app.callback( + Output('coexpression-download-graph-to-json', 'data'), + Input('coexpression-export-graph', 'n_clicks'), + State('coexpression-module-graph', 'elements'), + State('homepage-genomic-intervals-submitted-input', 'data') + ) + def download_coexpression_table_to_csv(download_n_clicks, coexpression_dict, genomic_intervals): + if download_n_clicks >= 1: + return dict(content='Hello world!', filename=f'[{genomic_intervals}] Co-Expression Network Analysis Graph.txt') + + raise PreventUpdate diff --git a/callbacks/coexpression/util.py b/callbacks/coexpression/util.py index 83f94608..8fe47a43 100644 --- a/callbacks/coexpression/util.py +++ b/callbacks/coexpression/util.py @@ -1,644 +1,644 @@ -from ..constants import Constants -from ..file_util import * -from ..general_util import * -from ..links_util import * -import os -import pickle - -import pandas as pd -import networkx as nx -from scipy.stats import fisher_exact, false_discovery_control - -from collections import namedtuple - -const = Constants() - -# Settings for the module detection algorithms: -# - multiplier: Value multiplied to the parameter to get the name of the directory -# For example, results of running clusterone at param=0.3 are saved in 30 -# - default_param: Default parameter of the module detection algorithm -# - low: User-facing display for the lowest parameter -# - high: User-facing display for the highest parameter - -Module_detection_algo = namedtuple('Module_detection_algo', [ - 'multiplier', 'default_param', 'low', 'high']) -module_detection_algos = { - 'clusterone': Module_detection_algo( - 100, 0.3, '1 (Looser Modules)', '4 (Denser Modules)'), - 'coach': Module_detection_algo( - 1000, 0.225, '1 (Looser Modules)', '4 (Denser Modules)'), - 'demon': Module_detection_algo( - 100, 0.25, '1 (Looser Modules)', '4 (Denser Modules)'), - 'fox': Module_detection_algo( - 100, 0.05, '1 (Looser Modules)', '4 (Denser Modules)'), -} - - -MODULE_DETECTION_ALGOS_VALUE_LABEL = [ - {'value': 'clusterone', 'label': 'ClusterONE', - 'label_id': 'clusterone'}, - {'value': 'coach', 'label': 'COACH', 'label_id': 'coach'}, - {'value': 'demon', 'label': 'DEMON', 'label_id': 'demon'}, - {'value': 'fox', 'label': 'FOX', 'label_id': 'fox'} -] - -COEXPRESSION_NETWORKS_VALUE_LABEL = [ - {'value': 'OS-CX', 'label': 'RiceNet v2', 'label_id': 'os-cx'}, - {'value': 'RCRN', - 'label': 'Rice Combined Mutual Ranked Network (RCRN)', 'label_id': 'rcrn'}, -] - -Enrichment_tab = namedtuple('Enrichment_tab', ['enrichment', 'path']) -enrichment_tabs = [Enrichment_tab('Gene Ontology', 'ontology_enrichment/go'), - Enrichment_tab('Trait Ontology', 'ontology_enrichment/to'), - Enrichment_tab('Plant Ontology', 'ontology_enrichment/po'), - Enrichment_tab('Pathways (Over-Representation)', - 'pathway_enrichment/ora'), - Enrichment_tab('Pathway-Express', 'pathway_enrichment/pe'), - Enrichment_tab('SPIA', 'pathway_enrichment/spia')] - - -def get_user_facing_parameter(algo, parameter, network='OS-CX'): - parameters = sorted( - map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) - - return parameters.index(parameter) + 1 - - -def get_user_facing_algo(algo): - for entry in MODULE_DETECTION_ALGOS_VALUE_LABEL: - if entry['value'] == algo: - return entry['label'] - - -def get_user_facing_network(network): - for entry in COEXPRESSION_NETWORKS_VALUE_LABEL: - if entry['value'] == network: - return entry['label'] - - -def get_parameters_for_algo(algo, network='OS-CX'): - """ - Returns the user-facing parameters for the module detection algorithms - - Parameters: - - algo: Module detection algorithm - - network: Any of the coexpression networks supported by the app - - Returns: - - User-facing parameters for the module detection algorithms - """ - param_dict = {} - parameters = sorted( - map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) - - # Display the user-facing parameters for the module detection algorithms - for idx, parameter in enumerate(parameters): - if idx == 0: - param_dict[int(parameter)] = module_detection_algos[algo].low - elif idx == len(parameters) - 1: - param_dict[int(parameter)] = module_detection_algos[algo].high - else: - param_dict[int(parameter)] = str(idx + 1) - - return param_dict - -# ================================================= -# Utility functions for module enrichment analysis -# ================================================= - - -def create_module_enrichment_results_dir(genomic_intervals, addl_genes, network, algo, parameters): - """ - Writes the accessions of the GWAS-implicated genes to a file - - Parameters: - - genes: Accessions of the genes implicated by GWAS - - genomic_intervals: Genomic interval entered by the user - - network: Coexpression network - - algo: Module detection algorithm - - parameters: Parameter at which module detection algorithm is run - - Returns: - - Parent directory of the file to which the accessions of the GWAS-implicated genes are written - """ - if addl_genes: - temp_output_folder_dir = get_path_to_temp( - genomic_intervals, const.TEMP_COEXPRESSION, f'{shorten_name(addl_genes)}/{network}/{algo}/{parameters}') - else: - temp_output_folder_dir = get_path_to_temp( - genomic_intervals, const.TEMP_COEXPRESSION, f'{network}/{algo}/{parameters}') - - if not path_exists(temp_output_folder_dir): - make_dir(temp_output_folder_dir) - - return temp_output_folder_dir - - -def fetch_enriched_modules(output_dir): - """ - Fetches the enriched modules from the output file of the module enrichment analysis - - Parameters: - - output_dir: Parent directory of the output file of the module enrichment analysis - - Returns: - - Enriched modules (i.e., their respectives indices and adjust p-values) - """ - modules = [] - with open(f'{output_dir}/enriched_modules.tsv') as modules_file: - for line in modules_file: - line = line.rstrip().split('\t') - idx = line[0] - p_value = float(line[1]) - - modules.append( - f'Module {idx} (Adj. p-value = {display_in_sci_notation(p_value)})') - - return modules - - -def do_module_enrichment_analysis(implicated_gene_ids, genomic_intervals, addl_genes, network, algo, parameters): - """ - Determine which modules are enriched given the set of GWAS-implicated genes - - Parameters: - - implicated_gene_ids: Accessions of the genes implicated by GWAS - - genomic_intervals: Genomic interval entered by the user - - network: Coexpression network - - algo: Module detection algorithm - - parameters: Parameter at which module detection algorithm is run - - Returns: - - Enriched modules (i.e., their respectives indices and adjust p-values) - """ - implicated_genes = set(implicated_gene_ids) - INPUT_GENES_DIR = create_module_enrichment_results_dir( - genomic_intervals, addl_genes, network, algo, parameters) - ENRICHED_MODULES_PATH = f'{INPUT_GENES_DIR}/enriched_modules.tsv' - - if not path_exists(ENRICHED_MODULES_PATH): - ENRICHED_MODULES_PATH_WITH_TIMESTAMP = append_timestamp_to_filename( - ENRICHED_MODULES_PATH) - MODULES_PATH = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' - - # ==================================================================================== - # This replicates the logic of running the universal enrichment function `enricher()` - # provided by clusterProfiler - # ==================================================================================== - - with open(MODULES_PATH) as modules_file, open(ENRICHED_MODULES_PATH_WITH_TIMESTAMP, 'w') as enriched_modules_file: - modules = [] - background_genes = set() - for idx, line in enumerate(modules_file): - module_genes = set(line.strip().split('\t')) - background_genes = background_genes.union(module_genes) - if implicated_genes.intersection(module_genes): - modules.append(idx) - - p_values_indices = [] - p_values = [] - modules_file.seek(0) - for idx, line in enumerate(modules_file): - if idx in modules: - module = line.strip().split('\t') - module_genes = set(module) - table = construct_contigency_table( - background_genes, implicated_genes, module_genes) - - p_values.append(fisher_exact( - table, alternative='greater').pvalue) - - # Add 1 since user-facing module number is one-based - p_values_indices.append(idx + 1) - - adj_p_values = false_discovery_control(p_values, method='bh') - significant_adj_p_values = [(p_values_indices[idx], adj_p_value) for idx, adj_p_value in enumerate( - adj_p_values) if adj_p_value < const.P_VALUE_CUTOFF] - significant_adj_p_values.sort(key=lambda x: x[1]) - significant_adj_p_values = [ - f'{ID}\t{adj_p_value}' for ID, adj_p_value in significant_adj_p_values] - - enriched_modules_file.write('\n'.join(significant_adj_p_values)) - - try: - os.replace(ENRICHED_MODULES_PATH_WITH_TIMESTAMP, - ENRICHED_MODULES_PATH) - except: - pass - - return fetch_enriched_modules(INPUT_GENES_DIR) - - -def construct_contigency_table(background_genes, implicated_genes, module_genes): - not_in_implicated = background_genes.difference(implicated_genes) - not_in_module = background_genes.difference(module_genes) - - in_implicated_in_module = len(implicated_genes.intersection(module_genes)) - in_implicated_not_in_module = len( - implicated_genes.intersection(not_in_module)) - - not_in_implicated_in_module = len( - not_in_implicated.intersection(module_genes)) - not_in_implicated_not_in_module = len( - not_in_implicated.intersection(not_in_module)) - - table = [[in_implicated_in_module, not_in_implicated_in_module], - [in_implicated_not_in_module, not_in_implicated_not_in_module]] - - return table - - -# =============================================================================================== -# Utility functions for the display of the tables showing the results of the enrichment analysis -# =============================================================================================== - - -def convert_transcript_to_msu_id(transcript_ids_str, network): - """ - Converts given KEGG transcript IDs to their respective MSU accessions. - - Parameters: - - transcript_ids_str: KEGG transcript IDs - - network: Coexpression network - - Returns: - - Equivalent MSU accessions of the KEGG transcript IDs - """ - with open(f'{const.GENE_ID_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f: - mapping_dict = pickle.load(f) - - output_str = '' - transcript_ids = transcript_ids_str.split('\n') - for transcript_id in transcript_ids: - for msu_id in mapping_dict[transcript_id]: - output_str += f'{msu_id}\n({transcript_id})\n\n' - - # Remove trailing newline characters - return output_str[:-2] - - -def get_genes_in_module(module_idx, network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/transcript/{algo}/{parameters}/{algo}-module-list.tsv') as f: - for idx, module in enumerate(f): - if idx + 1 == int(module_idx): - return set(module.split('\t')) - - -def get_genes_in_pathway(pathway_id, network): - with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_GENESET}', 'rb') as f: - genes_in_pathway = pickle.load(f) - - return genes_in_pathway[pathway_id] - - -def get_genes_in_module_and_pathway(pathway_id, module_idx, network, algo, parameters): - return '\n'.join(list(get_genes_in_pathway(pathway_id, network).intersection( - get_genes_in_module(module_idx, network, algo, parameters)))) - - -def get_kegg_pathway_name(pathway_id, network): - with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_PATHWAY_NAMES}') as pathways: - for line in pathways: - line = line.split('\t') - if line[0].rstrip() == pathway_id: - return line[1].strip() - - -def remove_rap_db_info_in_pathway_name(pathway_name): - return pathway_name[:-len(' - Oryza sativa japonica (Japanese rice) (RAPDB)')] - -# ======================================================================================= -# Functions for the display of the tables showing the results of the enrichment analysis -# ======================================================================================= - - -def convert_to_df_go(result): - cols = ['ID', 'Gene Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - # Prettify display of genes - result['Genes'] = result['Genes'].str.split('/').str.join('\n') - - result['ID'] = get_go_link(result, 'ID') - - result = result.sort_values('Adj. p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df_to(result): - cols = ['ID', 'Trait Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - # Prettify display of genes - result['Genes'] = result['Genes'].str.split('/').str.join('\n') - - result['ID'] = get_to_po_link(result, 'ID') - - result = result.sort_values('Adj. p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df_po(result): - cols = ['ID', 'Plant Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - # Prettify display of genes - result['Genes'] = result['Genes'].str.split('/').str.join('\n') - - result['ID'] = get_to_po_link(result, 'ID') - - result = result.sort_values('Adj. p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df_ora(result, network): - cols = ['ID', 'KEGG Pathway', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - result['KEGG Pathway'] = result['KEGG Pathway'].apply( - remove_rap_db_info_in_pathway_name) - - # Construct link before appending the MSU accession - result['ID'] = get_kegg_link(result, 'ID', 'Genes') - - # Prettify display of genes and convert to MSU accessions - result['Genes'] = result['Genes'].str.split( - '/').str.join('\n') - result['Genes'] = result.apply( - lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) - - result = result.sort_values('Adj. p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df_pe(result, module_idx, network, algo, parameters): - cols = ['ID', 'KEGG Pathway', 'ORA p-value', 'Perturbation p-value', 'Combined p-value', - 'Adj. ORA p-value', 'Adj. Perturbation p-value', - 'Adj. Combined p-value', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] - - # IMPORTANT: Do not change ordering of instructions - - # Prettify display of ID - result['ID'] = result['ID'].str[len('path:'):] - - result['KEGG Pathway'] = result.apply( - lambda x: get_kegg_pathway_name(x['ID'], network), axis=1) - result['KEGG Pathway'] = result['KEGG Pathway'].apply( - remove_rap_db_info_in_pathway_name) - - result['Genes'] = result.apply(lambda x: get_genes_in_module_and_pathway( - x['ID'], module_idx, network, algo, parameters), axis=1) - - # Construct link before appending the MSU accession - result['ID'] = get_kegg_link(result, 'ID', 'Genes') - - result['Genes'] = result.apply( - lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) - - result = result.sort_values('Adj. Combined p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df_spia(result, network): - cols = ['ID', 'KEGG Pathway', 'ORA p-value', 'Total Acc. Perturbation', 'Perturbation p-value', 'Combined p-value', - 'Adj. Combined p-value', 'Pathway Status', 'Genes'] - - if result.empty: - return create_empty_df_with_cols(cols) - - result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] - - # Prettify display of ID - result['ID'] = 'dosa' + result['ID'] - result['Total Acc. Perturbation'] = result['tA'] - - # Prettify display of genes and convert to MSU accessions - result['Genes'] = result['View on KEGG'].apply( - get_genes_from_kegg_link) - - # Construct link before appending the MSU accession - result['ID'] = get_kegg_link(result, 'ID', 'Genes') - - result['Genes'] = result.apply( - lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) - - result = result.sort_values('Adj. Combined p-value') - - display_cols_in_sci_notation( - result, [col for col in cols if 'p-value' in col]) - - return result[cols].dropna() - - -def convert_to_df(active_tab, module_idx, network, algo, parameters): - """ - Returns the results of ontology and pathway enrichment analysis as a data frame - - Parameters: - - active_tab: ID of the tab corresponding to the selected enrichment analysis - - module_idx: Index of the selected module - - network: Coexpression network - - algo: Module detection algorithm - - parameters: Parameter at which module detection algorithm is run - - Returns: - - Data frame containing the results of ontology and pathway enrichment analysis - - True if the data frame is empty; False, otherwise - """ - dir = enrichment_tabs[get_tab_index(active_tab)].path - enrichment_type = dir.split('/')[-1] - - file = f'{const.ENRICHMENT_ANALYSIS}/{network}/output/{algo}/{parameters}/{dir}/results/{enrichment_type}-df-{module_idx}.tsv' - - columns = {'go': ['ID', 'Gene Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], - 'to': ['ID', 'Trait Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], - 'po': ['ID', 'Plant Ontology Term', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], - 'ora': ['ID', 'KEGG Pathway', 'Gene Ratio', - 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], - 'pe': ['ID', 'totalAcc', 'totalPert', 'totalAccNorm', 'totalPertNorm', - 'Perturbation p-value', 'pAcc', 'ORA p-value', 'Combined p-value', - 'Adj. Perturbation p-value', 'Adj. Accumulation p-value', - 'Adj. ORA p-value', 'Adj. Combined p-value'], - 'spia': ['KEGG Pathway', 'ID', 'pSize', 'NDE', 'ORA p-value', 'tA', - 'Perturbation p-value', 'Combined p-value', 'Adj. Combined p-value', - 'Adj. Combined p-value (Bonferroni)', 'Pathway Status', 'View on KEGG']} - - try: - result = pd.read_csv(file, delimiter='\t', - names=columns[enrichment_type], skiprows=1) - - # SPIA is a special case - if enrichment_type.lower() == 'spia': - # Add dtype argument to preserve leading 0 in KEGG pathway ID - result = pd.read_csv(file, delimiter='\t', - names=columns[enrichment_type], skiprows=1, dtype={'ID': object}) - - empty = result.empty - except: - result = pd.DataFrame() - empty = True - - # Return results data frame and whether it is empty - if enrichment_type == 'go': - return convert_to_df_go(result), empty - - elif enrichment_type == 'to': - return convert_to_df_to(result), empty - - elif enrichment_type == 'po': - return convert_to_df_po(result), empty - - elif enrichment_type == 'ora': - return convert_to_df_ora(result, network), empty - - elif enrichment_type == 'pe': - return convert_to_df_pe(result, module_idx, network, algo, parameters), empty - - elif enrichment_type == 'spia': - return convert_to_df_spia(result, network), empty - - -def convert_module_to_edge_list(module, network_file, output_dir, filename): - module = set(module) - selected_nodes = set() - with open(network_file) as network, open(f'{output_dir}/{filename}', 'w') as output: - for edge in network: - edge = edge.rstrip() - nodes = edge.split('\t') - - if nodes[0] in module and nodes[1] in module: - selected_nodes.add(nodes[0]) - selected_nodes.add(nodes[1]) - output.write(f'{nodes[0]}\t{nodes[1]}\n') - - assert len(selected_nodes - module) == 0 - - -def convert_modules_to_edgelist(network_file, module_file, module_index, output_dir): - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - with open(module_file) as modules: - for idx, module in enumerate(modules): - if idx == module_index - 1: - module = module.rstrip() - module = module.split('\t') - filename = f'module-{idx + 1}.tsv' - convert_module_to_edge_list( - module, network_file, output_dir, filename) - - break - - -def load_module_graph(implicated_gene_ids, module, network, algo, parameters, layout): - """ - Displays the subgraph induced by the module - - Parameters: - - implicated_gene_ids: Accessions of the genes implicated by GWAS - - module: Gene module - - network: Coexpression network - - algo: Module detection algorithm - - parameters: Parameter at which module detection algorithm is run - - layout: Layout of the graph display - - Returns: - - Elements (nodes and edges) of the graph - - Dictionary storing the layout of the graph - - Dictionary storing the visibility, width, and height of the graph - """ - try: - # Ignore the word "Module" at the start - module_idx = int(module.split(' ')[1]) - OUTPUT_DIR = f'{const.TEMP}/{network}/{algo}/modules/{parameters}' - coexpress_nw = f'{OUTPUT_DIR}/module-{module_idx}.tsv' - - if not path_exists(coexpress_nw): - NETWORK_FILE = f'{const.NETWORKS}/{network}.txt' - MODULE_FILE = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' - - convert_modules_to_edgelist( - NETWORK_FILE, MODULE_FILE, module_idx, OUTPUT_DIR) - - G = nx.read_edgelist(coexpress_nw, data=(('coexpress', float))) - - # Highlight the GWAS-implicated genes - elements = nx.cytoscape_data(G)['elements'] - for node in elements['nodes']: - if node['data']['id'] in implicated_gene_ids: - node['classes'] = 'shaded' - - return elements, {'name': layout}, {'visibility': 'visible', 'width': '100%', 'height': '100vh'} - - # Triggered when there are no enriched modules - except: - return {}, {'name': layout}, {'display': 'none', 'width': '100%', 'height': '100vh'} - -# ==================================== -# Functions for displaying statistics -# ==================================== - - -def count_modules(network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as f: - return len(f.readlines()) - - -Noun = namedtuple('Noun', ['singular', 'plural']) - - -def get_noun_for_active_tab(active_tab): - tab_idx = get_tab_index(active_tab) - if 0 <= tab_idx and tab_idx <= 2: - return Noun('ontology term', 'ontology terms') - else: - return Noun('pathway', 'pathways') - - -def count_genes_in_module(implicated_genes, module_idx, network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as modules: - for idx, module in enumerate(modules): - if idx == module_idx - 1: - module_genes = module.strip().split('\t') - return len(module_genes), len(set.intersection(set(module_genes), set(implicated_genes))) +from ..constants import Constants +from ..file_util import * +from ..general_util import * +from ..links_util import * +import os +import pickle + +import pandas as pd +import networkx as nx +from scipy.stats import fisher_exact, false_discovery_control + +from collections import namedtuple + +const = Constants() + +# Settings for the module detection algorithms: +# - multiplier: Value multiplied to the parameter to get the name of the directory +# For example, results of running clusterone at param=0.3 are saved in 30 +# - default_param: Default parameter of the module detection algorithm +# - low: User-facing display for the lowest parameter +# - high: User-facing display for the highest parameter + +Module_detection_algo = namedtuple('Module_detection_algo', [ + 'multiplier', 'default_param', 'low', 'high']) +module_detection_algos = { + 'clusterone': Module_detection_algo( + 100, 0.3, '1 (Looser Modules)', '4 (Denser Modules)'), + 'coach': Module_detection_algo( + 1000, 0.225, '1 (Looser Modules)', '4 (Denser Modules)'), + 'demon': Module_detection_algo( + 100, 0.25, '1 (Looser Modules)', '4 (Denser Modules)'), + 'fox': Module_detection_algo( + 100, 0.05, '1 (Looser Modules)', '4 (Denser Modules)'), +} + + +MODULE_DETECTION_ALGOS_VALUE_LABEL = [ + {'value': 'clusterone', 'label': 'ClusterONE', + 'label_id': 'clusterone'}, + {'value': 'coach', 'label': 'COACH', 'label_id': 'coach'}, + {'value': 'demon', 'label': 'DEMON', 'label_id': 'demon'}, + {'value': 'fox', 'label': 'FOX', 'label_id': 'fox'} +] + +COEXPRESSION_NETWORKS_VALUE_LABEL = [ + {'value': 'OS-CX', 'label': 'RiceNet v2', 'label_id': 'os-cx'}, + {'value': 'RCRN', + 'label': 'Rice Combined Mutual Ranked Network (RCRN)', 'label_id': 'rcrn'}, +] + +Enrichment_tab = namedtuple('Enrichment_tab', ['enrichment', 'path']) +enrichment_tabs = [Enrichment_tab('Gene Ontology', 'ontology_enrichment/go'), + Enrichment_tab('Trait Ontology', 'ontology_enrichment/to'), + Enrichment_tab('Plant Ontology', 'ontology_enrichment/po'), + Enrichment_tab('Pathways (Over-Representation)', + 'pathway_enrichment/ora'), + Enrichment_tab('Pathway-Express', 'pathway_enrichment/pe'), + Enrichment_tab('SPIA', 'pathway_enrichment/spia')] + + +def get_user_facing_parameter(algo, parameter, network='OS-CX'): + parameters = sorted( + map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) + + return parameters.index(parameter) + 1 + + +def get_user_facing_algo(algo): + for entry in MODULE_DETECTION_ALGOS_VALUE_LABEL: + if entry['value'] == algo: + return entry['label'] + + +def get_user_facing_network(network): + for entry in COEXPRESSION_NETWORKS_VALUE_LABEL: + if entry['value'] == network: + return entry['label'] + + +def get_parameters_for_algo(algo, network='OS-CX'): + """ + Returns the user-facing parameters for the module detection algorithms + + Parameters: + - algo: Module detection algorithm + - network: Any of the coexpression networks supported by the app + + Returns: + - User-facing parameters for the module detection algorithms + """ + param_dict = {} + parameters = sorted( + map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) + + # Display the user-facing parameters for the module detection algorithms + for idx, parameter in enumerate(parameters): + if idx == 0: + param_dict[int(parameter)] = module_detection_algos[algo].low + elif idx == len(parameters) - 1: + param_dict[int(parameter)] = module_detection_algos[algo].high + else: + param_dict[int(parameter)] = str(idx + 1) + + return param_dict + +# ================================================= +# Utility functions for module enrichment analysis +# ================================================= + + +def create_module_enrichment_results_dir(genomic_intervals, addl_genes, network, algo, parameters): + """ + Writes the accessions of the GWAS-implicated genes to a file + + Parameters: + - genes: Accessions of the genes implicated by GWAS + - genomic_intervals: Genomic interval entered by the user + - network: Coexpression network + - algo: Module detection algorithm + - parameters: Parameter at which module detection algorithm is run + + Returns: + - Parent directory of the file to which the accessions of the GWAS-implicated genes are written + """ + if addl_genes: + temp_output_folder_dir = get_path_to_temp( + genomic_intervals, const.TEMP_COEXPRESSION, f'{shorten_name(addl_genes)}/{network}/{algo}/{parameters}') + else: + temp_output_folder_dir = get_path_to_temp( + genomic_intervals, const.TEMP_COEXPRESSION, f'{network}/{algo}/{parameters}') + + if not path_exists(temp_output_folder_dir): + make_dir(temp_output_folder_dir) + + return temp_output_folder_dir + + +def fetch_enriched_modules(output_dir): + """ + Fetches the enriched modules from the output file of the module enrichment analysis + + Parameters: + - output_dir: Parent directory of the output file of the module enrichment analysis + + Returns: + - Enriched modules (i.e., their respectives indices and adjust p-values) + """ + modules = [] + with open(f'{output_dir}/enriched_modules.tsv') as modules_file: + for line in modules_file: + line = line.rstrip().split('\t') + idx = line[0] + p_value = float(line[1]) + + modules.append( + f'Module {idx} (Adj. p-value = {display_in_sci_notation(p_value)})') + + return modules + + +def do_module_enrichment_analysis(implicated_gene_ids, genomic_intervals, addl_genes, network, algo, parameters): + """ + Determine which modules are enriched given the set of GWAS-implicated genes + + Parameters: + - implicated_gene_ids: Accessions of the genes implicated by GWAS + - genomic_intervals: Genomic interval entered by the user + - network: Coexpression network + - algo: Module detection algorithm + - parameters: Parameter at which module detection algorithm is run + + Returns: + - Enriched modules (i.e., their respectives indices and adjust p-values) + """ + implicated_genes = set(implicated_gene_ids) + INPUT_GENES_DIR = create_module_enrichment_results_dir( + genomic_intervals, addl_genes, network, algo, parameters) + ENRICHED_MODULES_PATH = f'{INPUT_GENES_DIR}/enriched_modules.tsv' + + if not path_exists(ENRICHED_MODULES_PATH): + ENRICHED_MODULES_PATH_WITH_TIMESTAMP = append_timestamp_to_filename( + ENRICHED_MODULES_PATH) + MODULES_PATH = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' + + # ==================================================================================== + # This replicates the logic of running the universal enrichment function `enricher()` + # provided by clusterProfiler + # ==================================================================================== + + with open(MODULES_PATH) as modules_file, open(ENRICHED_MODULES_PATH_WITH_TIMESTAMP, 'w') as enriched_modules_file: + modules = [] + background_genes = set() + for idx, line in enumerate(modules_file): + module_genes = set(line.strip().split('\t')) + background_genes = background_genes.union(module_genes) + if implicated_genes.intersection(module_genes): + modules.append(idx) + + p_values_indices = [] + p_values = [] + modules_file.seek(0) + for idx, line in enumerate(modules_file): + if idx in modules: + module = line.strip().split('\t') + module_genes = set(module) + table = construct_contigency_table( + background_genes, implicated_genes, module_genes) + + p_values.append(fisher_exact( + table, alternative='greater').pvalue) + + # Add 1 since user-facing module number is one-based + p_values_indices.append(idx + 1) + + adj_p_values = false_discovery_control(p_values, method='bh') + significant_adj_p_values = [(p_values_indices[idx], adj_p_value) for idx, adj_p_value in enumerate( + adj_p_values) if adj_p_value < const.P_VALUE_CUTOFF] + significant_adj_p_values.sort(key=lambda x: x[1]) + significant_adj_p_values = [ + f'{ID}\t{adj_p_value}' for ID, adj_p_value in significant_adj_p_values] + + enriched_modules_file.write('\n'.join(significant_adj_p_values)) + + try: + os.replace(ENRICHED_MODULES_PATH_WITH_TIMESTAMP, + ENRICHED_MODULES_PATH) + except: + pass + + return fetch_enriched_modules(INPUT_GENES_DIR) + + +def construct_contigency_table(background_genes, implicated_genes, module_genes): + not_in_implicated = background_genes.difference(implicated_genes) + not_in_module = background_genes.difference(module_genes) + + in_implicated_in_module = len(implicated_genes.intersection(module_genes)) + in_implicated_not_in_module = len( + implicated_genes.intersection(not_in_module)) + + not_in_implicated_in_module = len( + not_in_implicated.intersection(module_genes)) + not_in_implicated_not_in_module = len( + not_in_implicated.intersection(not_in_module)) + + table = [[in_implicated_in_module, not_in_implicated_in_module], + [in_implicated_not_in_module, not_in_implicated_not_in_module]] + + return table + + +# =============================================================================================== +# Utility functions for the display of the tables showing the results of the enrichment analysis +# =============================================================================================== + + +def convert_transcript_to_msu_id(transcript_ids_str, network): + """ + Converts given KEGG transcript IDs to their respective MSU accessions. + + Parameters: + - transcript_ids_str: KEGG transcript IDs + - network: Coexpression network + + Returns: + - Equivalent MSU accessions of the KEGG transcript IDs + """ + with open(f'{const.GENE_ID_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f: + mapping_dict = pickle.load(f) + + output_str = '' + transcript_ids = transcript_ids_str.split('\n') + for transcript_id in transcript_ids: + for msu_id in mapping_dict[transcript_id]: + output_str += f'{msu_id}\n({transcript_id})\n\n' + + # Remove trailing newline characters + return output_str[:-2] + + +def get_genes_in_module(module_idx, network, algo, parameters): + with open(f'{const.NETWORK_MODULES}/{network}/transcript/{algo}/{parameters}/{algo}-module-list.tsv') as f: + for idx, module in enumerate(f): + if idx + 1 == int(module_idx): + return set(module.split('\t')) + + +def get_genes_in_pathway(pathway_id, network): + with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_GENESET}', 'rb') as f: + genes_in_pathway = pickle.load(f) + + return genes_in_pathway[pathway_id] + + +def get_genes_in_module_and_pathway(pathway_id, module_idx, network, algo, parameters): + return '\n'.join(list(get_genes_in_pathway(pathway_id, network).intersection( + get_genes_in_module(module_idx, network, algo, parameters)))) + + +def get_kegg_pathway_name(pathway_id, network): + with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_PATHWAY_NAMES}') as pathways: + for line in pathways: + line = line.split('\t') + if line[0].rstrip() == pathway_id: + return line[1].strip() + + +def remove_rap_db_info_in_pathway_name(pathway_name): + return pathway_name[:-len(' - Oryza sativa japonica (Japanese rice) (RAPDB)')] + +# ======================================================================================= +# Functions for the display of the tables showing the results of the enrichment analysis +# ======================================================================================= + + +def convert_to_df_go(result): + cols = ['ID', 'Gene Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + # Prettify display of genes + result['Genes'] = result['Genes'].str.split('/').str.join('\n') + + result['ID'] = get_go_link(result, 'ID') + + result = result.sort_values('Adj. p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df_to(result): + cols = ['ID', 'Trait Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + # Prettify display of genes + result['Genes'] = result['Genes'].str.split('/').str.join('\n') + + result['ID'] = get_to_po_link(result, 'ID') + + result = result.sort_values('Adj. p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df_po(result): + cols = ['ID', 'Plant Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + # Prettify display of genes + result['Genes'] = result['Genes'].str.split('/').str.join('\n') + + result['ID'] = get_to_po_link(result, 'ID') + + result = result.sort_values('Adj. p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df_ora(result, network): + cols = ['ID', 'KEGG Pathway', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + result['KEGG Pathway'] = result['KEGG Pathway'].apply( + remove_rap_db_info_in_pathway_name) + + # Construct link before appending the MSU accession + result['ID'] = get_kegg_link(result, 'ID', 'Genes') + + # Prettify display of genes and convert to MSU accessions + result['Genes'] = result['Genes'].str.split( + '/').str.join('\n') + result['Genes'] = result.apply( + lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) + + result = result.sort_values('Adj. p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df_pe(result, module_idx, network, algo, parameters): + cols = ['ID', 'KEGG Pathway', 'ORA p-value', 'Perturbation p-value', 'Combined p-value', + 'Adj. ORA p-value', 'Adj. Perturbation p-value', + 'Adj. Combined p-value', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] + + # IMPORTANT: Do not change ordering of instructions + + # Prettify display of ID + result['ID'] = result['ID'].str[len('path:'):] + + result['KEGG Pathway'] = result.apply( + lambda x: get_kegg_pathway_name(x['ID'], network), axis=1) + result['KEGG Pathway'] = result['KEGG Pathway'].apply( + remove_rap_db_info_in_pathway_name) + + result['Genes'] = result.apply(lambda x: get_genes_in_module_and_pathway( + x['ID'], module_idx, network, algo, parameters), axis=1) + + # Construct link before appending the MSU accession + result['ID'] = get_kegg_link(result, 'ID', 'Genes') + + result['Genes'] = result.apply( + lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) + + result = result.sort_values('Adj. Combined p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df_spia(result, network): + cols = ['ID', 'KEGG Pathway', 'ORA p-value', 'Total Acc. Perturbation', 'Perturbation p-value', 'Combined p-value', + 'Adj. Combined p-value', 'Pathway Status', 'Genes'] + + if result.empty: + return create_empty_df_with_cols(cols) + + result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] + + # Prettify display of ID + result['ID'] = 'dosa' + result['ID'] + result['Total Acc. Perturbation'] = result['tA'] + + # Prettify display of genes and convert to MSU accessions + result['Genes'] = result['View on KEGG'].apply( + get_genes_from_kegg_link) + + # Construct link before appending the MSU accession + result['ID'] = get_kegg_link(result, 'ID', 'Genes') + + result['Genes'] = result.apply( + lambda x: convert_transcript_to_msu_id(x['Genes'], network), axis=1) + + result = result.sort_values('Adj. Combined p-value') + + display_cols_in_sci_notation( + result, [col for col in cols if 'p-value' in col]) + + return result[cols].dropna() + + +def convert_to_df(active_tab, module_idx, network, algo, parameters): + """ + Returns the results of ontology and pathway enrichment analysis as a data frame + + Parameters: + - active_tab: ID of the tab corresponding to the selected enrichment analysis + - module_idx: Index of the selected module + - network: Coexpression network + - algo: Module detection algorithm + - parameters: Parameter at which module detection algorithm is run + + Returns: + - Data frame containing the results of ontology and pathway enrichment analysis + - True if the data frame is empty; False, otherwise + """ + dir = enrichment_tabs[get_tab_index(active_tab)].path + enrichment_type = dir.split('/')[-1] + + file = f'{const.ENRICHMENT_ANALYSIS}/{network}/output/{algo}/{parameters}/{dir}/results/{enrichment_type}-df-{module_idx}.tsv' + + columns = {'go': ['ID', 'Gene Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], + 'to': ['ID', 'Trait Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], + 'po': ['ID', 'Plant Ontology Term', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], + 'ora': ['ID', 'KEGG Pathway', 'Gene Ratio', + 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], + 'pe': ['ID', 'totalAcc', 'totalPert', 'totalAccNorm', 'totalPertNorm', + 'Perturbation p-value', 'pAcc', 'ORA p-value', 'Combined p-value', + 'Adj. Perturbation p-value', 'Adj. Accumulation p-value', + 'Adj. ORA p-value', 'Adj. Combined p-value'], + 'spia': ['KEGG Pathway', 'ID', 'pSize', 'NDE', 'ORA p-value', 'tA', + 'Perturbation p-value', 'Combined p-value', 'Adj. Combined p-value', + 'Adj. Combined p-value (Bonferroni)', 'Pathway Status', 'View on KEGG']} + + try: + result = pd.read_csv(file, delimiter='\t', + names=columns[enrichment_type], skiprows=1) + + # SPIA is a special case + if enrichment_type.lower() == 'spia': + # Add dtype argument to preserve leading 0 in KEGG pathway ID + result = pd.read_csv(file, delimiter='\t', + names=columns[enrichment_type], skiprows=1, dtype={'ID': object}) + + empty = result.empty + except: + result = pd.DataFrame() + empty = True + + # Return results data frame and whether it is empty + if enrichment_type == 'go': + return convert_to_df_go(result), empty + + elif enrichment_type == 'to': + return convert_to_df_to(result), empty + + elif enrichment_type == 'po': + return convert_to_df_po(result), empty + + elif enrichment_type == 'ora': + return convert_to_df_ora(result, network), empty + + elif enrichment_type == 'pe': + return convert_to_df_pe(result, module_idx, network, algo, parameters), empty + + elif enrichment_type == 'spia': + return convert_to_df_spia(result, network), empty + + +def convert_module_to_edge_list(module, network_file, output_dir, filename): + module = set(module) + selected_nodes = set() + with open(network_file) as network, open(f'{output_dir}/{filename}', 'w') as output: + for edge in network: + edge = edge.rstrip() + nodes = edge.split('\t') + + if nodes[0] in module and nodes[1] in module: + selected_nodes.add(nodes[0]) + selected_nodes.add(nodes[1]) + output.write(f'{nodes[0]}\t{nodes[1]}\n') + + assert len(selected_nodes - module) == 0 + + +def convert_modules_to_edgelist(network_file, module_file, module_index, output_dir): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + with open(module_file) as modules: + for idx, module in enumerate(modules): + if idx == module_index - 1: + module = module.rstrip() + module = module.split('\t') + filename = f'module-{idx + 1}.tsv' + convert_module_to_edge_list( + module, network_file, output_dir, filename) + + break + + +def load_module_graph(implicated_gene_ids, module, network, algo, parameters, layout): + """ + Displays the subgraph induced by the module + + Parameters: + - implicated_gene_ids: Accessions of the genes implicated by GWAS + - module: Gene module + - network: Coexpression network + - algo: Module detection algorithm + - parameters: Parameter at which module detection algorithm is run + - layout: Layout of the graph display + + Returns: + - Elements (nodes and edges) of the graph + - Dictionary storing the layout of the graph + - Dictionary storing the visibility, width, and height of the graph + """ + try: + # Ignore the word "Module" at the start + module_idx = int(module.split(' ')[1]) + OUTPUT_DIR = f'{const.TEMP}/{network}/{algo}/modules/{parameters}' + coexpress_nw = f'{OUTPUT_DIR}/module-{module_idx}.tsv' + + if not path_exists(coexpress_nw): + NETWORK_FILE = f'{const.NETWORKS}/{network}.txt' + MODULE_FILE = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' + + convert_modules_to_edgelist( + NETWORK_FILE, MODULE_FILE, module_idx, OUTPUT_DIR) + + G = nx.read_edgelist(coexpress_nw, data=(('coexpress', float))) + + # Highlight the GWAS-implicated genes + elements = nx.cytoscape_data(G)['elements'] + for node in elements['nodes']: + if node['data']['id'] in implicated_gene_ids: + node['classes'] = 'shaded' + + return elements, {'name': layout}, {'visibility': 'visible', 'width': '100%', 'height': '100vh'} + + # Triggered when there are no enriched modules + except: + return {}, {'name': layout}, {'display': 'none', 'width': '100%', 'height': '100vh'} + +# ==================================== +# Functions for displaying statistics +# ==================================== + + +def count_modules(network, algo, parameters): + with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as f: + return len(f.readlines()) + + +Noun = namedtuple('Noun', ['singular', 'plural']) + + +def get_noun_for_active_tab(active_tab): + tab_idx = get_tab_index(active_tab) + if 0 <= tab_idx and tab_idx <= 2: + return Noun('ontology term', 'ontology terms') + else: + return Noun('pathway', 'pathways') + + +def count_genes_in_module(implicated_genes, module_idx, network, algo, parameters): + with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as modules: + for idx, module in enumerate(modules): + if idx == module_idx - 1: + module_genes = module.strip().split('\t') + return len(module_genes), len(set.intersection(set(module_genes), set(implicated_genes))) diff --git a/callbacks/constants.py b/callbacks/constants.py index ba7fd54d..1b559238 100644 --- a/callbacks/constants.py +++ b/callbacks/constants.py @@ -1,64 +1,64 @@ -class Constants(object): - LIFT_OVER = 'lift-over' - COEXPRESSION = 'co-expression' - TFBS = 'tf-enrichment' - IGV = 'browse-loci' - TEXT_MINING = 'text-mining' - - DATA = 'static' - APP_DATA = f'{DATA}/app_data' - RAW_DATA = f'{DATA}/raw_data' - - ANNOTATIONS = f'{APP_DATA}/annotations' - ALIGNMENTS = f'{APP_DATA}/alignments' - OGI_MAPPING = f'{APP_DATA}/ogi_mapping' - GENE_DESCRIPTIONS = f'{APP_DATA}/gene_descriptions' - GENE_ID_MAPPING = f'{APP_DATA}/gene_id_mapping' - TEXT_MINING = f'{APP_DATA}/text_mining' - QTARO = f'{APP_DATA}/qtaro' - - GENOMES_NIPPONBARE = f'{APP_DATA}/genomes/Nipponbare' - ANNOTATIONS_NB = f'{ANNOTATIONS}/Nb' - OPEN_CHROMATIN = f'{APP_DATA}/open_chromatin' - OPEN_CHROMATIN_PANICLE = f'{OPEN_CHROMATIN}/panicle' - QTARO_DICTIONARY = f'{QTARO}/qtaro.pickle' - - NETWORKS = f'{APP_DATA}/networks' - NETWORK_MODULES = f'{APP_DATA}/network_modules' - - TEMP = f'{DATA}/temp' - IMPLICATED_GENES = f'{TEMP}/implicated_genes' - TEMP_IGV = 'igv' - TEMP_COEXPRESSION = 'co_expression' - TEMP_TEXT_MINING = 'text_mining' - - TEMP_TFBS = 'tf_enrichment' - TFBS_BEDS = f'{APP_DATA}/tf_enrichment' - PROMOTER_BED = 'query_promoter_intervals' - GENOME_WIDE_BED = 'query_genomic_intervals' - TFBS_ANNOTATION = f'{TFBS_BEDS}/annotation' - - DATA_PREPARATION_SCRIPTS = 'prepare_data/workflow/scripts' - ENRICHMENT_ANALYSIS_SCRIPTS = f'{DATA_PREPARATION_SCRIPTS}/enrichment_analysis' - - ENRICHMENT_ANALYSIS = f'{APP_DATA}/enrichment_analysis' - ENRICHMENT_ANALYSIS_MAPPING = 'mapping' - ENRICHMENT_ANALYSIS_MODULES = 'modules' - - KEGG_DOSA_GENESET = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-geneset.pickle' - KEGG_DOSA_PATHWAY_NAMES = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-pathway-names.tsv' - - TEXT_MINING_ANNOTATED_ABSTRACTS = f'{TEXT_MINING}/annotated_abstracts.tsv' - TEXT_MINING_PUBMED = f'{TEXT_MINING}/pubmed_per_gene' - - P_VALUE_CUTOFF = 0.05 - - # ========= - # Database - # ========= - - FILE_STATUS_DB = f'{TEMP}/file_status.db' - FILE_STATUS_TABLE = 'file_status' - - def __init__(self): - pass +class Constants(object): + LIFT_OVER = 'lift-over' + COEXPRESSION = 'co-expression' + TFBS = 'tf-enrichment' + IGV = 'browse-loci' + TEXT_MINING = 'text-mining' + + DATA = 'static' + APP_DATA = f'{DATA}/app_data' + RAW_DATA = f'{DATA}/raw_data' + + ANNOTATIONS = f'{APP_DATA}/annotations' + ALIGNMENTS = f'{APP_DATA}/alignments' + OGI_MAPPING = f'{APP_DATA}/ogi_mapping' + GENE_DESCRIPTIONS = f'{APP_DATA}/gene_descriptions' + GENE_ID_MAPPING = f'{APP_DATA}/gene_id_mapping' + TEXT_MINING = f'{APP_DATA}/text_mining' + QTARO = f'{APP_DATA}/qtaro' + + GENOMES_NIPPONBARE = f'{APP_DATA}/genomes/Nipponbare' + ANNOTATIONS_NB = f'{ANNOTATIONS}/Nb' + OPEN_CHROMATIN = f'{APP_DATA}/open_chromatin' + OPEN_CHROMATIN_PANICLE = f'{OPEN_CHROMATIN}/panicle' + QTARO_DICTIONARY = f'{QTARO}/qtaro.pickle' + + NETWORKS = f'{APP_DATA}/networks' + NETWORK_MODULES = f'{APP_DATA}/network_modules' + + TEMP = f'{DATA}/temp' + IMPLICATED_GENES = f'{TEMP}/implicated_genes' + TEMP_IGV = 'igv' + TEMP_COEXPRESSION = 'co_expression' + TEMP_TEXT_MINING = 'text_mining' + + TEMP_TFBS = 'tf_enrichment' + TFBS_BEDS = f'{APP_DATA}/tf_enrichment' + PROMOTER_BED = 'query_promoter_intervals' + GENOME_WIDE_BED = 'query_genomic_intervals' + TFBS_ANNOTATION = f'{TFBS_BEDS}/annotation' + + DATA_PREPARATION_SCRIPTS = 'prepare_data/workflow/scripts' + ENRICHMENT_ANALYSIS_SCRIPTS = f'{DATA_PREPARATION_SCRIPTS}/enrichment_analysis' + + ENRICHMENT_ANALYSIS = f'{APP_DATA}/enrichment_analysis' + ENRICHMENT_ANALYSIS_MAPPING = 'mapping' + ENRICHMENT_ANALYSIS_MODULES = 'modules' + + KEGG_DOSA_GENESET = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-geneset.pickle' + KEGG_DOSA_PATHWAY_NAMES = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-pathway-names.tsv' + + TEXT_MINING_ANNOTATED_ABSTRACTS = f'{TEXT_MINING}/annotated_abstracts.tsv' + TEXT_MINING_PUBMED = f'{TEXT_MINING}/pubmed_per_gene' + + P_VALUE_CUTOFF = 0.05 + + # ========= + # Database + # ========= + + FILE_STATUS_DB = f'{TEMP}/file_status.db' + FILE_STATUS_TABLE = 'file_status' + + def __init__(self): + pass diff --git a/callbacks/file_util.py b/callbacks/file_util.py index 03834e14..00f76c0a 100644 --- a/callbacks/file_util.py +++ b/callbacks/file_util.py @@ -1,119 +1,119 @@ -import regex as re -import os -from .constants import Constants - -import time -import sqlite3 - -const = Constants() - - -def path_exists(path): - """ - Checks if given path exists - - Parameters: - - path: Path to be checked if it exists - - Returns: - - True if the path exists; False, otherwise - """ - return os.path.exists(path) - - -def make_dir(directory): - """ - Creates given directory if it does not yet exist - - Parameters: - - directory: Directory to be created - """ - if not path_exists(directory): - os.makedirs(directory) - - -def convert_text_to_path(text): - """ - Converts given text into a well-formed path - - Parameters: - - text: Text to be converted into a well-formed path - - Returns: - - Well-formed path - """ - return text.strip().replace( - ":", "_").replace(";", "__").replace("-", "_").replace('.', '_').replace(' ', '') - - -def get_path_to_temp(genomic_interval, analysis_type, *args): - """ - Forms the path to temporary (file-cached) results of given post-GWAS analysis - This function returns only the path name. It does not create the actual file or directory - - Parameters: - - genomic_interval: Genomic interval entered by the user - - analysis_type: Post-GWAS analysis - - args: Subfolder names appended to the path - - Returns: - - Path to temporary (file-cached) results of post-GWAS analysis - """ - genomic_interval_foldername = shorten_name(convert_text_to_path( - genomic_interval)) - - analysis_type = convert_text_to_path(analysis_type) - - temp_dir = f'{const.TEMP}/{genomic_interval_foldername}/{analysis_type}' - for folder in args: - temp_dir += f'/{convert_text_to_path(folder)}' - - temp_dir = re.sub(r'/+', '/', temp_dir) - - return temp_dir - - -def get_path_to_text_mining_temp(analysis_type, *args): - analysis_type = convert_text_to_path(analysis_type) - - temp_dir = f'{const.TEMP}/{analysis_type}' - for folder in args: - temp_dir += f'/{convert_text_to_path(folder)}' - - temp_dir = re.sub(r'/+', '/', temp_dir) - - return temp_dir - -def shorten_name(name): - try: - connection = sqlite3.connect(const.FILE_STATUS_DB) - cursor = connection.cursor() - - query = f'INSERT OR IGNORE INTO {const.FILE_STATUS_TABLE}(name) VALUES("{name}")' - - cursor.execute(query) - connection.commit() - - cursor.close() - connection.close() - except sqlite3.Error as error: - pass - - try: - connection = sqlite3.connect(const.FILE_STATUS_DB) - cursor = connection.cursor() - - query = f'SELECT rowid FROM {const.FILE_STATUS_TABLE} WHERE name = "{name}"' - cursor.execute(query) - row_id = cursor.fetchall()[0][0] - - cursor.close() - connection.close() - except sqlite3.Error as error: - pass - - return row_id - - -def append_timestamp_to_filename(filename): - return f'{filename}.{time.time_ns() // 1000}' +import regex as re +import os +from .constants import Constants + +import time +import sqlite3 + +const = Constants() + + +def path_exists(path): + """ + Checks if given path exists + + Parameters: + - path: Path to be checked if it exists + + Returns: + - True if the path exists; False, otherwise + """ + return os.path.exists(path) + + +def make_dir(directory): + """ + Creates given directory if it does not yet exist + + Parameters: + - directory: Directory to be created + """ + if not path_exists(directory): + os.makedirs(directory) + + +def convert_text_to_path(text): + """ + Converts given text into a well-formed path + + Parameters: + - text: Text to be converted into a well-formed path + + Returns: + - Well-formed path + """ + return text.strip().replace( + ":", "_").replace(";", "__").replace("-", "_").replace('.', '_').replace(' ', '') + + +def get_path_to_temp(genomic_interval, analysis_type, *args): + """ + Forms the path to temporary (file-cached) results of given post-GWAS analysis + This function returns only the path name. It does not create the actual file or directory + + Parameters: + - genomic_interval: Genomic interval entered by the user + - analysis_type: Post-GWAS analysis + - args: Subfolder names appended to the path + + Returns: + - Path to temporary (file-cached) results of post-GWAS analysis + """ + genomic_interval_foldername = shorten_name(convert_text_to_path( + genomic_interval)) + + analysis_type = convert_text_to_path(analysis_type) + + temp_dir = f'{const.TEMP}/{genomic_interval_foldername}/{analysis_type}' + for folder in args: + temp_dir += f'/{convert_text_to_path(folder)}' + + temp_dir = re.sub(r'/+', '/', temp_dir) + + return temp_dir + + +def get_path_to_text_mining_temp(analysis_type, *args): + analysis_type = convert_text_to_path(analysis_type) + + temp_dir = f'{const.TEMP}/{analysis_type}' + for folder in args: + temp_dir += f'/{convert_text_to_path(folder)}' + + temp_dir = re.sub(r'/+', '/', temp_dir) + + return temp_dir + +def shorten_name(name): + try: + connection = sqlite3.connect(const.FILE_STATUS_DB) + cursor = connection.cursor() + + query = f'INSERT OR IGNORE INTO {const.FILE_STATUS_TABLE}(name) VALUES("{name}")' + + cursor.execute(query) + connection.commit() + + cursor.close() + connection.close() + except sqlite3.Error as error: + pass + + try: + connection = sqlite3.connect(const.FILE_STATUS_DB) + cursor = connection.cursor() + + query = f'SELECT rowid FROM {const.FILE_STATUS_TABLE} WHERE name = "{name}"' + cursor.execute(query) + row_id = cursor.fetchall()[0][0] + + cursor.close() + connection.close() + except sqlite3.Error as error: + pass + + return row_id + + +def append_timestamp_to_filename(filename): + return f'{filename}.{time.time_ns() // 1000}' diff --git a/callbacks/general_util.py b/callbacks/general_util.py index c54b2fc0..c404741d 100644 --- a/callbacks/general_util.py +++ b/callbacks/general_util.py @@ -1,56 +1,56 @@ -import pandas as pd - -NULL_PLACEHOLDER = '–' - - -def display_in_sci_notation(number): - """ - Returns given number in scientific notation n * 10^m, where n is rounded to 6 decimal places - - Parameters: - - number: Number whose equivalent in scientific notation is to be returned - - Returns: - - Number in scientific notation - """ - return '{:.6e}'.format(number) - - -def display_in_fixed_dec_places(number): - return '{:.6f}'.format(float(number)) - - -def display_cols_in_sci_notation(result, numeric_columns): - for column in numeric_columns: - result[column] = result[column].apply(display_in_sci_notation) - - -def display_cols_in_fixed_dec_places(result, numeric_columns): - for column in numeric_columns: - result[column] = result[column].apply(display_in_fixed_dec_places) - - -def create_empty_df_with_cols(cols): - cols_dict = {} - for col in cols: - cols_dict[col] = [NULL_PLACEHOLDER] - - return pd.DataFrame(cols_dict) - - -def get_tab_index(tab_id): - return int(tab_id.split('-')[1]) - - -def get_num_unique_entries(table, column): - if table[column].iloc[0] == NULL_PLACEHOLDER: - return 0 - - return table[column].nunique() - - -def get_num_entries(table, column): - if table[column].iloc[0] == NULL_PLACEHOLDER: - return 0 - - return table[column].count() +import pandas as pd + +NULL_PLACEHOLDER = '–' + + +def display_in_sci_notation(number): + """ + Returns given number in scientific notation n * 10^m, where n is rounded to 6 decimal places + + Parameters: + - number: Number whose equivalent in scientific notation is to be returned + + Returns: + - Number in scientific notation + """ + return '{:.6e}'.format(number) + + +def display_in_fixed_dec_places(number): + return '{:.6f}'.format(float(number)) + + +def display_cols_in_sci_notation(result, numeric_columns): + for column in numeric_columns: + result[column] = result[column].apply(display_in_sci_notation) + + +def display_cols_in_fixed_dec_places(result, numeric_columns): + for column in numeric_columns: + result[column] = result[column].apply(display_in_fixed_dec_places) + + +def create_empty_df_with_cols(cols): + cols_dict = {} + for col in cols: + cols_dict[col] = [NULL_PLACEHOLDER] + + return pd.DataFrame(cols_dict) + + +def get_tab_index(tab_id): + return int(tab_id.split('-')[1]) + + +def get_num_unique_entries(table, column): + if table[column].iloc[0] == NULL_PLACEHOLDER: + return 0 + + return table[column].nunique() + + +def get_num_entries(table, column): + if table[column].iloc[0] == NULL_PLACEHOLDER: + return 0 + + return table[column].count() diff --git a/callbacks/homepage/callbacks.py b/callbacks/homepage/callbacks.py index 8ac2e9da..40d8d079 100644 --- a/callbacks/homepage/callbacks.py +++ b/callbacks/homepage/callbacks.py @@ -1,184 +1,184 @@ - -from dash import Input, Output, State, html, ctx, ALL -from dash.exceptions import PreventUpdate -from .util import * -from ..lift_over import util as lift_over_util -from ..browse_loci import util as browse_loci_util -from ..constants import Constants - -from ..style_util import * - -const = Constants() - - -def init_callback(app): - - @app.callback( - Output({'type': 'analysis-nav', 'label': ALL}, 'className'), - Output({'type': 'analysis-layout', 'label': ALL}, 'hidden'), - State({'type': 'analysis-nav', 'label': ALL}, 'className'), - State({'type': 'analysis-nav', 'label': ALL}, 'id'), - State({'type': 'analysis-layout', 'label': ALL}, 'id'), - Input('current-analysis-page-nav', 'data'), - Input('homepage-submit', 'n_clicks'), - State({'type': 'analysis-layout', 'label': ALL}, 'hidden'), - ) - def display_specific_analysis_page(nav_className, analysis_nav_id, analysis_layout_id, current_page, *_): - if current_page: - update_nav_class_name = [] - update_layout_hidden = [] - - for i in range(len(analysis_nav_id)): - if analysis_nav_id[i]['label'] == current_page: - nav_classes = add_class_name('active', nav_className[i]) - else: - nav_classes = remove_class_name('active', nav_className[i]) - - update_nav_class_name.append(nav_classes) - - for i in range(len(analysis_layout_id)): - if analysis_layout_id[i]['label'] == current_page: - hide_layout = False - else: - hide_layout = True - - update_layout_hidden.append(hide_layout) - - return update_nav_class_name, update_layout_hidden - - raise PreventUpdate - - @app.callback( - Output('session-container', 'children'), - Output('input-error', 'children'), - Output('input-error', 'style'), - Output('homepage-is-submitted', 'data'), - Output('homepage-genomic-intervals-submitted-input', 'data'), - - State('homepage-genomic-intervals', 'value'), - - Input('homepage-submit', 'n_clicks'), - Input('homepage-genomic-intervals', 'n_submit'), - State('session-container', 'children'), - - Input('homepage-reset', 'n_clicks'), - Input('homepage-clear-cache', 'n_clicks'), - - prevent_initial_call=True - ) - def parse_input(nb_intervals_str, n_clicks, n_submit, dccStore_children, *_): - if 'homepage-clear-cache' == ctx.triggered_id: - clear_cache_folder() - - if 'homepage-reset' == ctx.triggered_id: - # clear data for items in dcc.Store found in session-container - dccStore_children = get_cleared_dccStore_data_excluding_some_data(dccStore_children) - - return dccStore_children, None, {'display': 'none'}, False, '' - - if n_submit >= 1 or ('homepage-submit' == ctx.triggered_id and n_clicks >= 1): - if nb_intervals_str: - intervals = lift_over_util.get_genomic_intervals_from_input( - nb_intervals_str) - - if lift_over_util.is_error(intervals): - return dccStore_children, [f'Error encountered while parsing genomic interval {intervals[1]}', html.Br(), lift_over_util.get_error_message(intervals[0])], \ - {'display': 'block'}, False, nb_intervals_str - else: - # clear data for items in dcc.Store found in session-container - dccStore_children = get_cleared_dccStore_data_excluding_some_data( - dccStore_children, 'homepage-genomic-intervals-saved-input') - - browse_loci_util.write_igv_tracks_to_file(nb_intervals_str) - - return dccStore_children, None, {'display': 'none'}, True, nb_intervals_str - else: - return dccStore_children, [f'Error: Input for genomic interval should not be empty.'], \ - {'display': 'block'}, False, nb_intervals_str - - raise PreventUpdate - - @app.callback( - Output('lift-over-nb-table', 'data'), - Output('lift-over-nb-entire-table', 'data'), - Input('homepage-genomic-intervals-submitted-input', 'data'), - State('homepage-is-submitted', 'data') - ) - def get_nipponbare_gene_ids(nb_intervals_str, homepage_is_submitted): - if homepage_is_submitted: - if nb_intervals_str: - nb_intervals = lift_over_util.get_genomic_intervals_from_input( - nb_intervals_str) - - if not lift_over_util.is_error(nb_intervals): - genes_from_Nb = lift_over_util.get_genes_in_Nb( - nb_intervals) - - return genes_from_Nb[1], genes_from_Nb[0].to_dict('records') - - raise PreventUpdate - - @app.callback( - Output('homepage-genomic-intervals-saved-input', - 'data', allow_duplicate=True), - Input({'type': 'example-genomic-interval', - 'description': ALL}, 'n_clicks'), - prevent_initial_call=True - ) - def set_input_fields_with_preset_input(example_genomic_interval_n_clicks): - if ctx.triggered_id and not all(val == 0 for val in example_genomic_interval_n_clicks): - return get_example_genomic_interval(ctx.triggered_id['description']) - - raise PreventUpdate - - - @app.callback( - Output('homepage-genomic-intervals-saved-input', - 'data', allow_duplicate=True), - Input('homepage-genomic-intervals', 'value'), - prevent_initial_call=True - ) - def set_input_fields(genomic_intervals): - return genomic_intervals - - - @app.callback( - Output('homepage-results-container', 'style'), - Input('homepage-is-submitted', 'data'), - Input('homepage-submit', 'n_clicks'), - ) - def display_homepage_output(homepage_is_submitted, *_): - if homepage_is_submitted: - return {'display': 'block'} - - else: - return {'display': 'none'} - - @app.callback( - Output('current-analysis-page-nav', 'data'), - Input({'type': 'analysis-nav', 'label': ALL}, 'n_clicks') - ) - def set_input_homepage_session_state(analysis_nav_items_n_clicks): - if ctx.triggered_id: - if not all(val == 0 for val in analysis_nav_items_n_clicks): - analysis_page_id = ctx.triggered_id.label - return analysis_page_id - - raise PreventUpdate - - @app.callback( - Output('homepage-genomic-intervals', 'value'), - Input('homepage-genomic-intervals-saved-input', 'data'), - ) - def get_input_homepage_session_state(genomic_intervals): - return genomic_intervals - - @app.callback( - Output('genomic-interval-modal', 'is_open'), - Input('genomic-interval-tooltip', 'n_clicks') - ) - def open_modals(tooltip_n_clicks): - if tooltip_n_clicks > 0: - return True - - raise PreventUpdate + +from dash import Input, Output, State, html, ctx, ALL +from dash.exceptions import PreventUpdate +from .util import * +from ..lift_over import util as lift_over_util +from ..browse_loci import util as browse_loci_util +from ..constants import Constants + +from ..style_util import * + +const = Constants() + + +def init_callback(app): + + @app.callback( + Output({'type': 'analysis-nav', 'label': ALL}, 'className'), + Output({'type': 'analysis-layout', 'label': ALL}, 'hidden'), + State({'type': 'analysis-nav', 'label': ALL}, 'className'), + State({'type': 'analysis-nav', 'label': ALL}, 'id'), + State({'type': 'analysis-layout', 'label': ALL}, 'id'), + Input('current-analysis-page-nav', 'data'), + Input('homepage-submit', 'n_clicks'), + State({'type': 'analysis-layout', 'label': ALL}, 'hidden'), + ) + def display_specific_analysis_page(nav_className, analysis_nav_id, analysis_layout_id, current_page, *_): + if current_page: + update_nav_class_name = [] + update_layout_hidden = [] + + for i in range(len(analysis_nav_id)): + if analysis_nav_id[i]['label'] == current_page: + nav_classes = add_class_name('active', nav_className[i]) + else: + nav_classes = remove_class_name('active', nav_className[i]) + + update_nav_class_name.append(nav_classes) + + for i in range(len(analysis_layout_id)): + if analysis_layout_id[i]['label'] == current_page: + hide_layout = False + else: + hide_layout = True + + update_layout_hidden.append(hide_layout) + + return update_nav_class_name, update_layout_hidden + + raise PreventUpdate + + @app.callback( + Output('session-container', 'children'), + Output('input-error', 'children'), + Output('input-error', 'style'), + Output('homepage-is-submitted', 'data'), + Output('homepage-genomic-intervals-submitted-input', 'data'), + + State('homepage-genomic-intervals', 'value'), + + Input('homepage-submit', 'n_clicks'), + Input('homepage-genomic-intervals', 'n_submit'), + State('session-container', 'children'), + + Input('homepage-reset', 'n_clicks'), + Input('homepage-clear-cache', 'n_clicks'), + + prevent_initial_call=True + ) + def parse_input(nb_intervals_str, n_clicks, n_submit, dccStore_children, *_): + if 'homepage-clear-cache' == ctx.triggered_id: + clear_cache_folder() + + if 'homepage-reset' == ctx.triggered_id: + # clear data for items in dcc.Store found in session-container + dccStore_children = get_cleared_dccStore_data_excluding_some_data(dccStore_children) + + return dccStore_children, None, {'display': 'none'}, False, '' + + if n_submit >= 1 or ('homepage-submit' == ctx.triggered_id and n_clicks >= 1): + if nb_intervals_str: + intervals = lift_over_util.get_genomic_intervals_from_input( + nb_intervals_str) + + if lift_over_util.is_error(intervals): + return dccStore_children, [f'Error encountered while parsing genomic interval {intervals[1]}', html.Br(), lift_over_util.get_error_message(intervals[0])], \ + {'display': 'block'}, False, nb_intervals_str + else: + # clear data for items in dcc.Store found in session-container + dccStore_children = get_cleared_dccStore_data_excluding_some_data( + dccStore_children, 'homepage-genomic-intervals-saved-input') + + browse_loci_util.write_igv_tracks_to_file(nb_intervals_str) + + return dccStore_children, None, {'display': 'none'}, True, nb_intervals_str + else: + return dccStore_children, [f'Error: Input for genomic interval should not be empty.'], \ + {'display': 'block'}, False, nb_intervals_str + + raise PreventUpdate + + @app.callback( + Output('lift-over-nb-table', 'data'), + Output('lift-over-nb-entire-table', 'data'), + Input('homepage-genomic-intervals-submitted-input', 'data'), + State('homepage-is-submitted', 'data') + ) + def get_nipponbare_gene_ids(nb_intervals_str, homepage_is_submitted): + if homepage_is_submitted: + if nb_intervals_str: + nb_intervals = lift_over_util.get_genomic_intervals_from_input( + nb_intervals_str) + + if not lift_over_util.is_error(nb_intervals): + genes_from_Nb = lift_over_util.get_genes_in_Nb( + nb_intervals) + + return genes_from_Nb[1], genes_from_Nb[0].to_dict('records') + + raise PreventUpdate + + @app.callback( + Output('homepage-genomic-intervals-saved-input', + 'data', allow_duplicate=True), + Input({'type': 'example-genomic-interval', + 'description': ALL}, 'n_clicks'), + prevent_initial_call=True + ) + def set_input_fields_with_preset_input(example_genomic_interval_n_clicks): + if ctx.triggered_id and not all(val == 0 for val in example_genomic_interval_n_clicks): + return get_example_genomic_interval(ctx.triggered_id['description']) + + raise PreventUpdate + + + @app.callback( + Output('homepage-genomic-intervals-saved-input', + 'data', allow_duplicate=True), + Input('homepage-genomic-intervals', 'value'), + prevent_initial_call=True + ) + def set_input_fields(genomic_intervals): + return genomic_intervals + + + @app.callback( + Output('homepage-results-container', 'style'), + Input('homepage-is-submitted', 'data'), + Input('homepage-submit', 'n_clicks'), + ) + def display_homepage_output(homepage_is_submitted, *_): + if homepage_is_submitted: + return {'display': 'block'} + + else: + return {'display': 'none'} + + @app.callback( + Output('current-analysis-page-nav', 'data'), + Input({'type': 'analysis-nav', 'label': ALL}, 'n_clicks') + ) + def set_input_homepage_session_state(analysis_nav_items_n_clicks): + if ctx.triggered_id: + if not all(val == 0 for val in analysis_nav_items_n_clicks): + analysis_page_id = ctx.triggered_id.label + return analysis_page_id + + raise PreventUpdate + + @app.callback( + Output('homepage-genomic-intervals', 'value'), + Input('homepage-genomic-intervals-saved-input', 'data'), + ) + def get_input_homepage_session_state(genomic_intervals): + return genomic_intervals + + @app.callback( + Output('genomic-interval-modal', 'is_open'), + Input('genomic-interval-tooltip', 'n_clicks') + ) + def open_modals(tooltip_n_clicks): + if tooltip_n_clicks > 0: + return True + + raise PreventUpdate diff --git a/callbacks/homepage/util.py b/callbacks/homepage/util.py index eceac5b0..6fa3818e 100644 --- a/callbacks/homepage/util.py +++ b/callbacks/homepage/util.py @@ -1,77 +1,86 @@ -import os -import shutil -from ..style_util import * -from ..constants import Constants -from ..file_util import * - -import sqlite3 - -const = Constants() - -example_genomic_intervals = { - 'pre-harvest': 'Chr01:1523625-1770814;Chr04:4662701-4670717', - 'anaerobic-germination': 'Chr07:6000000-6900000'} - - -def clear_cache_folder(): - if os.path.exists(const.TEMP): - shutil.rmtree(const.TEMP, ignore_errors=True) - - # Drop the table - try: - connection = sqlite3.connect(const.FILE_STATUS_DB) - cursor = connection.cursor() - - query = f'DROP TABLE {const.FILE_STATUS_TABLE}' - - cursor.execute(query) - connection.commit() - - cursor.close() - connection.close() - except: - pass - - # Recreate the database - make_dir(const.TEMP) - - try: - connection = sqlite3.connect(const.FILE_STATUS_DB) - cursor = connection.cursor() - - query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' - - cursor.execute(query) - connection.commit() - - cursor.close() - connection.close() - except sqlite3.Error as error: - pass - - -def get_cleared_dccStore_data_excluding_some_data(dccStore_children, *arg): - for i in range(len(dccStore_children)): - dccStore_ID = dccStore_children[i]['props']['id'] - - if not dccStore_ID in arg: - dccStore_children[i]['props']['data'] = '' - - return dccStore_children - - -def get_example_genomic_interval(description): - return example_genomic_intervals[description] - - -def set_active_class(display_map, active_class): - class_names = [] - for page, layout_link in display_map.items(): - if page == active_class: - class_name = add_class_name('active', layout_link.link_class) - else: - class_name = remove_class_name('active', layout_link.link_class) - - class_names.append(class_name) - - return tuple(class_names) +import os +import shutil +from ..style_util import * +from ..constants import Constants +from ..file_util import * + +import sqlite3 + +const = Constants() + +example_genomic_intervals = { + 'pre-harvest': 'Chr01:1523625-1770814;Chr04:4662701-4670717', + 'anaerobic-germination': 'Chr07:6000000-6900000'} + + +def clear_cache_folder(): + if os.path.exists(const.TEMP): + shutil.rmtree(const.TEMP, ignore_errors=True) + + # Drop the table + try: + connection = sqlite3.connect(const.FILE_STATUS_DB) + cursor = connection.cursor() + + query = f'DROP TABLE {const.FILE_STATUS_TABLE}' + + cursor.execute(query) + connection.commit() + + cursor.close() + connection.close() + except: + pass + + # Recreate the database + make_dir(const.TEMP) + + try: + connection = sqlite3.connect(const.FILE_STATUS_DB) + cursor = connection.cursor() + + query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' + + cursor.execute(query) + connection.commit() + + cursor.close() + connection.close() + except sqlite3.Error as error: + pass + + +def get_cleared_dccStore_data_excluding_some_data(dccStore_children, *args): + for i in range(len(dccStore_children)): + dccStore_ID = dccStore_children[i]['props']['id'] + + if args: + flag = False + for arg in args: + if arg in dccStore_ID: + flag = True + + if not flag: + dccStore_children[i]['props']['data'] = '' + + else: + dccStore_children[i]['props']['data'] = '' + + return dccStore_children + + +def get_example_genomic_interval(description): + return example_genomic_intervals[description] + + +def set_active_class(display_map, active_class): + class_names = [] + for page, layout_link in display_map.items(): + if page == active_class: + class_name = add_class_name('active', layout_link.link_class) + else: + class_name = remove_class_name('active', layout_link.link_class) + + class_names.append(class_name) + + return tuple(class_names) diff --git a/callbacks/lift_over/callbacks.py b/callbacks/lift_over/callbacks.py index dad5627a..354ad730 100644 --- a/callbacks/lift_over/callbacks.py +++ b/callbacks/lift_over/callbacks.py @@ -1,425 +1,425 @@ -from dash import Input, Output, State, dcc, html -from dash.exceptions import PreventUpdate - -from .util import * -from ..constants import Constants -from ..general_util import * -const = Constants() - - -def init_callback(app): - @app.callback( - Output('lift-over-genomic-intervals-input', 'children'), - State('homepage-genomic-intervals-submitted-input', 'data'), - Input('homepage-is-submitted', 'data'), - Input('lift-over-submit', 'n_clicks') - ) - def display_input(nb_intervals_str, homepage_is_submitted, *_): - if homepage_is_submitted: - if nb_intervals_str and not is_error( - get_genomic_intervals_from_input(nb_intervals_str)): - return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] - else: - return None - - raise PreventUpdate - - @app.callback( - Output('lift-over-is-submitted', 'data', allow_duplicate=True), - Output('lift-over-other-refs-submitted-input', - 'data', allow_duplicate=True), - Output('lift-over-active-tab', 'data', allow_duplicate=True), - Output('lift-over-active-filter', 'data', allow_duplicate=True), - Input('lift-over-submit', 'n_clicks'), - State('homepage-is-submitted', 'data'), - State('lift-over-other-refs', 'value'), - prevent_initial_call=True - ) - def submit_lift_over_input(lift_over_submit_n_clicks, homepage_is_submitted, other_refs): - if homepage_is_submitted and lift_over_submit_n_clicks >= 1: - other_refs = sanitize_other_refs(other_refs) - - return True, other_refs, None, None - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-container', 'style'), - Input('lift-over-is-submitted', 'data'), - ) - def display_lift_over_output(lift_over_is_submitted): - if lift_over_is_submitted: - return {'display': 'block'} - - else: - return {'display': 'none'} - - @app.callback( - Output('lift-over-results-intro', 'children'), - Output('lift-over-results-tabs', 'children'), - - Output('lift-over-overlap-table-filter', 'options'), - Output('lift-over-overlap-table-filter', 'value'), - - State('homepage-genomic-intervals-submitted-input', 'data'), - Input('lift-over-other-refs-submitted-input', 'data'), - - State('homepage-is-submitted', 'data'), - - State('lift-over-active-filter', 'data'), - State('lift-over-is-submitted', 'data') - ) - def display_gene_tabs(nb_intervals_str, other_refs, homepage_is_submitted, active_filter, lift_over_is_submitted): - if homepage_is_submitted and lift_over_is_submitted: - if nb_intervals_str and not is_error(get_genomic_intervals_from_input(nb_intervals_str)): - tabs = get_tabs() - - other_refs = sanitize_other_refs(other_refs) - - if other_refs: - tabs = tabs + other_refs - - tabs_children = [dcc.Tab(label=tab, value=tab) if idx < len(get_tabs()) - else dcc.Tab(label=f'Unique to {tab}', value=tab) - for idx, tab in enumerate(tabs)] - - if not active_filter: - active_filter = tabs[len(get_tabs()) - 1:] - - gene_list_msg = [html.Span( - 'The tabs below show the implicated genes in '), html.B('Nipponbare')] - - if other_refs: - other_refs_str = other_refs[0] - if len(other_refs) == 2: - other_refs_str += f' and {other_refs[1]}' - elif len(other_refs) > 2: - for idx, other_ref in enumerate(other_refs[1:]): - if idx != len(other_refs) - 2: - other_refs_str += f', ' - else: - other_refs_str += f', and ' - - other_refs_str += f'{other_ref} ({other_ref_genomes[other_ref]})' - - gene_list_msg += [html.Span(' and in orthologous regions of '), - html.B(other_refs_str)] - - gene_list_msg += [html.Span(':')] - - return gene_list_msg, tabs_children, tabs[len(get_tabs()) - 1:], active_filter - else: - return None, None, [], None - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-tabs', 'active_tab'), - State('homepage-is-submitted', 'data'), - State('lift-over-active-tab', 'data'), - State('lift-over-is-submitted', 'data'), - Input('lift-over-other-refs-submitted-input', 'data') - ) - def display_active_tab(homepage_is_submitted, saved_active_tab, lift_over_is_submitted, *_): - if homepage_is_submitted and lift_over_is_submitted: - if not saved_active_tab: - return 'tab-0' - - return saved_active_tab - - raise PreventUpdate - - @app.callback( - Output('lift-over-other-refs-saved-input', - 'data', allow_duplicate=True), - Input('lift-over-other-refs', 'value'), - State('homepage-is-submitted', 'data'), - prevent_initial_call=True - ) - def set_input_lift_over_session_state(other_refs, homepage_is_submitted): - if homepage_is_submitted: - return other_refs - - raise PreventUpdate - - @app.callback( - Output('lift-over-active-tab', 'data', allow_duplicate=True), - Output('lift-over-active-filter', 'data', allow_duplicate=True), - - Input('lift-over-results-tabs', 'active_tab'), - Input('lift-over-overlap-table-filter', 'value'), - - State('homepage-is-submitted', 'data'), - State('lift-over-is-submitted', 'data'), - prevent_initial_call=True, - ) - def get_submitted_lift_over_session_state(active_tab, filter_rice_variants, homepage_is_submitted, lift_over_is_submitted): - if homepage_is_submitted and lift_over_is_submitted: - return active_tab, filter_rice_variants - - raise PreventUpdate - - @app.callback( - Output('lift-over-other-refs', 'value'), - State('lift-over-other-refs', 'multi'), - State('homepage-is-submitted', 'data'), - State('lift-over-other-refs-saved-input', 'data'), - Input('homepage-genomic-intervals-submitted-input', 'data'), - Input('lift-over-submit', 'n_clicks') - ) - def get_input_lift_over_session_state(is_multi_other_refs, homepage_is_submitted, other_refs, *_): - if homepage_is_submitted: - if not is_multi_other_refs and other_refs: - other_refs = other_refs[0] - - return other_refs - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-gene-intro', 'children'), - Output('lift-over-overlap-table-filter', 'style'), - - Input('lift-over-results-tabs', 'active_tab'), - State('lift-over-results-tabs', 'children'), - State('homepage-is-submitted', 'data'), - State('lift-over-is-submitted', 'data') - ) - def display_gene_intro(active_tab, children, homepage_is_submitted, lift_over_is_submitted): - if homepage_is_submitted and lift_over_is_submitted: - if active_tab == get_tab_id('All Genes'): - return 'The table below lists all the implicated genes.', {'display': 'none'} - - elif active_tab == get_tab_id('Common Genes'): - return 'The table below lists the implicated genes that are common to:', {'display': 'block'} - - elif active_tab == get_tab_id('Nipponbare'): - return 'The table below lists the genes overlapping the site in the Nipponbare reference.', {'display': 'none'} - - else: - tab_number = get_tab_index(active_tab) - other_ref = children[tab_number]['props']['value'] - - return f'The table below lists the genes from homologous regions in {other_ref} that are not in Nipponbare.', {'display': 'none'} - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-statistics', 'children'), - Output('lift-over-results-tabs', 'className'), - - Input('homepage-genomic-intervals-submitted-input', 'data'), - Input('lift-over-other-refs-submitted-input', 'data'), - - State('homepage-is-submitted', 'data'), - State('lift-over-is-submitted', 'data') - ) - def display_gene_statistics(nb_intervals_str, other_refs, homepage_is_submitted, lift_over_is_submitted): - if homepage_is_submitted and lift_over_is_submitted: - nb_intervals = get_genomic_intervals_from_input( - nb_intervals_str) - - genes_from_Nb_raw = get_genes_in_Nb(nb_intervals)[0] - - num_unique_genes = get_num_unique_entries( - genes_from_Nb_raw, 'OGI') - if num_unique_genes == 1: - gene_statistics_nb = f'{num_unique_genes} gene was found in Nipponbare' - else: - gene_statistics_nb = f'{num_unique_genes} genes were found in Nipponbare' - - for idx, other_ref in enumerate(other_refs): - common_genes_raw = get_common_genes([other_ref], nb_intervals) - num_unique_genes = get_num_unique_entries( - common_genes_raw, 'OGI') - if idx == len(other_refs) - 1: - if num_unique_genes == 1: - gene_statistics_nb += f', and {num_unique_genes} gene in {other_ref}' - else: - gene_statistics_nb += f', and {num_unique_genes} genes in {other_ref}' - else: - if num_unique_genes == 1: - gene_statistics_nb += f', {num_unique_genes} gene in {other_ref}' - else: - gene_statistics_nb += f', {num_unique_genes} genes in {other_ref}' - - gene_statistics_nb += '. ' - gene_statistics_items = [html.Li(gene_statistics_nb)] - - if other_refs: - other_refs.append('Nipponbare') - genes_common = get_common_genes(other_refs, nb_intervals) - num_unique_genes = get_num_unique_entries(genes_common, 'OGI') - - if num_unique_genes == 1: - gene_statistics_common = f'Among these, {num_unique_genes} gene is common to all cultivars.' - else: - gene_statistics_common = f'Among these, {num_unique_genes} genes are common to all cultivars.' - - gene_statistics_items.append( - html.Li(gene_statistics_common)) - - gene_statistics_other_ref = f'' - other_refs.pop() # Remove added Nipponbare - for idx, other_ref in enumerate(other_refs): - genes_from_other_ref_raw = get_unique_genes_in_other_ref( - other_ref, nb_intervals) - - if len(other_refs) > 1 and idx == len(other_refs) - 1: - gene_statistics_other_ref += f', and ' - elif idx != 0: - gene_statistics_other_ref += f', ' - - num_unique_genes = get_num_unique_entries( - genes_from_other_ref_raw, 'OGI') - - if num_unique_genes == 1: - gene_statistics_other_ref += f'{num_unique_genes} gene is unique to {other_ref}' - else: - gene_statistics_other_ref += f'{num_unique_genes} genes are unique to {other_ref}' - - gene_statistics_other_ref += '.' - gene_statistics_items.append( - html.Li(gene_statistics_other_ref)) - - # Setting the class name of lift-over-results-tabs to None is for removing the top margin during loading - return gene_statistics_items, None - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-table', 'columns'), - Output('lift-over-results-table', 'data'), - - Input('homepage-genomic-intervals-submitted-input', 'data'), - Input('lift-over-results-tabs', 'active_tab'), - Input('lift-over-overlap-table-filter', 'value'), - Input('lift-over-other-refs-submitted-input', 'data'), - - State('lift-over-results-tabs', 'children'), - State('homepage-is-submitted', 'data'), - State('lift-over-is-submitted', 'data') - ) - def display_gene_tables(nb_intervals_str, active_tab, filter_rice_variants, other_refs, children, homepage_is_submitted, lift_over_is_submitted): - if homepage_is_submitted and lift_over_is_submitted: - nb_intervals = get_genomic_intervals_from_input( - nb_intervals_str) - - if active_tab == get_tab_id('All Genes'): - all_genes_raw = get_all_genes(other_refs, nb_intervals) - - mask = (all_genes_raw['OGI'] != NULL_PLACEHOLDER) - all_genes_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( - all_genes_raw, 'OGI') - if 'Nipponbare' in all_genes_raw.columns: - mask = (all_genes_raw['Nipponbare'] != NULL_PLACEHOLDER) - all_genes_raw.loc[mask, 'Nipponbare'] = get_rgi_genecard_link( - all_genes_raw, 'Nipponbare') - - for cultivar in other_ref_genomes: - if cultivar in all_genes_raw.columns: - mask = (all_genes_raw[cultivar] != NULL_PLACEHOLDER) - all_genes_raw.loc[mask, cultivar] = get_rgi_genecard_link( - all_genes_raw, cultivar) - - all_genes = all_genes_raw.to_dict('records') - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in all_genes_raw.columns] - - return columns, all_genes - - elif active_tab == get_tab_id('Common Genes'): - common_genes_raw = get_common_genes( - filter_rice_variants, nb_intervals) - - # Mask will be triggered if no cultivar is selected - mask = (common_genes_raw['OGI'] != NULL_PLACEHOLDER) - common_genes_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( - common_genes_raw, 'OGI') - - if 'Nipponbare' in common_genes_raw.columns: - mask = (common_genes_raw['Nipponbare'] != NULL_PLACEHOLDER) - common_genes_raw.loc[mask, 'Nipponbare'] = get_rgi_genecard_link( - common_genes_raw, 'Nipponbare') - - for cultivar in other_ref_genomes: - if cultivar in common_genes_raw.columns: - mask = (common_genes_raw[cultivar] != NULL_PLACEHOLDER) - common_genes_raw.loc[mask, cultivar] = get_rgi_genecard_link( - common_genes_raw, cultivar) - - common_genes = common_genes_raw.to_dict('records') - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in common_genes_raw.columns] - - return columns, common_genes - - elif active_tab == get_tab_id('Nipponbare'): - genes_from_Nb_raw = get_genes_in_Nb( - nb_intervals)[0].drop( - ['Chromosome', 'Start', 'End', 'Strand'], axis=1) - - mask = (genes_from_Nb_raw['OGI'] != NULL_PLACEHOLDER) - genes_from_Nb_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( - genes_from_Nb_raw, 'OGI') - - mask = (genes_from_Nb_raw['Name'] != NULL_PLACEHOLDER) - genes_from_Nb_raw.loc[mask, 'Name'] = get_rgi_genecard_link( - genes_from_Nb_raw, 'Name') - - genes_from_Nb = genes_from_Nb_raw.to_dict('records') - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in genes_from_Nb_raw.columns] - - return columns, genes_from_Nb - - else: - tab_number = get_tab_index(active_tab) - other_ref = children[tab_number]['props']['value'] - - genes_from_other_ref_raw = get_unique_genes_in_other_ref( - other_ref, nb_intervals) - - mask = (genes_from_other_ref_raw['OGI'] != NULL_PLACEHOLDER) - genes_from_other_ref_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( - genes_from_other_ref_raw, 'OGI') - - mask = (genes_from_other_ref_raw['Name'] != NULL_PLACEHOLDER) - genes_from_other_ref_raw.loc[mask, 'Name'] = get_rgi_genecard_link( - genes_from_other_ref_raw, 'Name') - - genes_from_other_ref = genes_from_other_ref_raw.to_dict( - 'records') - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in genes_from_other_ref_raw.columns] - - return columns, genes_from_other_ref - - raise PreventUpdate - - @app.callback( - Output('lift-over-results-table', 'filter_query'), - - Input('lift-over-reset-table', 'n_clicks'), - Input('lift-over-results-tabs', 'active_tab'), - Input('lift-over-overlap-table-filter', 'value') - ) - def reset_table_filters(*_): - return '' - - @app.callback( - Output('lift-over-download-df-to-csv', 'data'), - Input('lift-over-export-table', 'n_clicks'), - State('lift-over-results-table', 'data'), - State('homepage-genomic-intervals-submitted-input', 'data') - ) - def download_lift_over_table_to_csv(download_n_clicks, lift_over_df, genomic_intervals): - if download_n_clicks >= 1: - df = pd.DataFrame(lift_over_df) - return dcc.send_data_frame(df.to_csv, f'[{genomic_intervals}] Gene List and Lift-Over.csv', index=False) - - raise PreventUpdate +from dash import Input, Output, State, dcc, html +from dash.exceptions import PreventUpdate + +from .util import * +from ..constants import Constants +from ..general_util import * +const = Constants() + + +def init_callback(app): + @app.callback( + Output('lift-over-genomic-intervals-input', 'children'), + State('homepage-genomic-intervals-submitted-input', 'data'), + Input('homepage-is-submitted', 'data'), + Input('lift-over-submit', 'n_clicks') + ) + def display_input(nb_intervals_str, homepage_is_submitted, *_): + if homepage_is_submitted: + if nb_intervals_str and not is_error( + get_genomic_intervals_from_input(nb_intervals_str)): + return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] + else: + return None + + raise PreventUpdate + + @app.callback( + Output('lift-over-is-submitted', 'data', allow_duplicate=True), + Output('lift-over-other-refs-submitted-input', + 'data', allow_duplicate=True), + Output('lift-over-active-tab', 'data', allow_duplicate=True), + Output('lift-over-active-filter', 'data', allow_duplicate=True), + Input('lift-over-submit', 'n_clicks'), + State('homepage-is-submitted', 'data'), + State('lift-over-other-refs', 'value'), + prevent_initial_call=True + ) + def submit_lift_over_input(lift_over_submit_n_clicks, homepage_is_submitted, other_refs): + if homepage_is_submitted and lift_over_submit_n_clicks >= 1: + other_refs = sanitize_other_refs(other_refs) + + return True, other_refs, None, None + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-container', 'style'), + Input('lift-over-is-submitted', 'data'), + ) + def display_lift_over_output(lift_over_is_submitted): + if lift_over_is_submitted: + return {'display': 'block'} + + else: + return {'display': 'none'} + + @app.callback( + Output('lift-over-results-intro', 'children'), + Output('lift-over-results-tabs', 'children'), + + Output('lift-over-overlap-table-filter', 'options'), + Output('lift-over-overlap-table-filter', 'value'), + + State('homepage-genomic-intervals-submitted-input', 'data'), + Input('lift-over-other-refs-submitted-input', 'data'), + + State('homepage-is-submitted', 'data'), + + State('lift-over-active-filter', 'data'), + State('lift-over-is-submitted', 'data') + ) + def display_gene_tabs(nb_intervals_str, other_refs, homepage_is_submitted, active_filter, lift_over_is_submitted): + if homepage_is_submitted and lift_over_is_submitted: + if nb_intervals_str and not is_error(get_genomic_intervals_from_input(nb_intervals_str)): + tabs = get_tabs() + + other_refs = sanitize_other_refs(other_refs) + + if other_refs: + tabs = tabs + other_refs + + tabs_children = [dcc.Tab(label=tab, value=tab) if idx < len(get_tabs()) + else dcc.Tab(label=f'Unique to {tab}', value=tab) + for idx, tab in enumerate(tabs)] + + if not active_filter: + active_filter = tabs[len(get_tabs()) - 1:] + + gene_list_msg = [html.Span( + 'The tabs below show the implicated genes in '), html.B('Nipponbare')] + + if other_refs: + other_refs_str = other_refs[0] + if len(other_refs) == 2: + other_refs_str += f' and {other_refs[1]}' + elif len(other_refs) > 2: + for idx, other_ref in enumerate(other_refs[1:]): + if idx != len(other_refs) - 2: + other_refs_str += f', ' + else: + other_refs_str += f', and ' + + other_refs_str += f'{other_ref} ({other_ref_genomes[other_ref]})' + + gene_list_msg += [html.Span(' and in orthologous regions of '), + html.B(other_refs_str)] + + gene_list_msg += [html.Span(':')] + + return gene_list_msg, tabs_children, tabs[len(get_tabs()) - 1:], active_filter + else: + return None, None, [], None + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-tabs', 'active_tab'), + State('homepage-is-submitted', 'data'), + State('lift-over-active-tab', 'data'), + State('lift-over-is-submitted', 'data'), + Input('lift-over-other-refs-submitted-input', 'data') + ) + def display_active_tab(homepage_is_submitted, saved_active_tab, lift_over_is_submitted, *_): + if homepage_is_submitted and lift_over_is_submitted: + if not saved_active_tab: + return 'tab-0' + + return saved_active_tab + + raise PreventUpdate + + @app.callback( + Output('lift-over-other-refs-saved-input', + 'data', allow_duplicate=True), + Input('lift-over-other-refs', 'value'), + State('homepage-is-submitted', 'data'), + prevent_initial_call=True + ) + def set_input_lift_over_session_state(other_refs, homepage_is_submitted): + if homepage_is_submitted: + return other_refs + + raise PreventUpdate + + @app.callback( + Output('lift-over-active-tab', 'data', allow_duplicate=True), + Output('lift-over-active-filter', 'data', allow_duplicate=True), + + Input('lift-over-results-tabs', 'active_tab'), + Input('lift-over-overlap-table-filter', 'value'), + + State('homepage-is-submitted', 'data'), + State('lift-over-is-submitted', 'data'), + prevent_initial_call=True, + ) + def get_submitted_lift_over_session_state(active_tab, filter_rice_variants, homepage_is_submitted, lift_over_is_submitted): + if homepage_is_submitted and lift_over_is_submitted: + return active_tab, filter_rice_variants + + raise PreventUpdate + + @app.callback( + Output('lift-over-other-refs', 'value'), + State('lift-over-other-refs', 'multi'), + State('homepage-is-submitted', 'data'), + State('lift-over-other-refs-saved-input', 'data'), + Input('homepage-genomic-intervals-submitted-input', 'data'), + Input('lift-over-submit', 'n_clicks') + ) + def get_input_lift_over_session_state(is_multi_other_refs, homepage_is_submitted, other_refs, *_): + if homepage_is_submitted: + if not is_multi_other_refs and other_refs: + other_refs = other_refs[0] + + return other_refs + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-gene-intro', 'children'), + Output('lift-over-overlap-table-filter', 'style'), + + Input('lift-over-results-tabs', 'active_tab'), + State('lift-over-results-tabs', 'children'), + State('homepage-is-submitted', 'data'), + State('lift-over-is-submitted', 'data') + ) + def display_gene_intro(active_tab, children, homepage_is_submitted, lift_over_is_submitted): + if homepage_is_submitted and lift_over_is_submitted: + if active_tab == get_tab_id('All Genes'): + return 'The table below lists all the implicated genes.', {'display': 'none'} + + elif active_tab == get_tab_id('Common Genes'): + return 'The table below lists the implicated genes that are common to:', {'display': 'block'} + + elif active_tab == get_tab_id('Nipponbare'): + return 'The table below lists the genes overlapping the site in the Nipponbare reference.', {'display': 'none'} + + else: + tab_number = get_tab_index(active_tab) + other_ref = children[tab_number]['props']['value'] + + return f'The table below lists the genes from homologous regions in {other_ref} that are not in Nipponbare.', {'display': 'none'} + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-statistics', 'children'), + Output('lift-over-results-tabs', 'className'), + + Input('homepage-genomic-intervals-submitted-input', 'data'), + Input('lift-over-other-refs-submitted-input', 'data'), + + State('homepage-is-submitted', 'data'), + State('lift-over-is-submitted', 'data') + ) + def display_gene_statistics(nb_intervals_str, other_refs, homepage_is_submitted, lift_over_is_submitted): + if homepage_is_submitted and lift_over_is_submitted: + nb_intervals = get_genomic_intervals_from_input( + nb_intervals_str) + + genes_from_Nb_raw = get_genes_in_Nb(nb_intervals)[0] + + num_unique_genes = get_num_unique_entries( + genes_from_Nb_raw, 'OGI') + if num_unique_genes == 1: + gene_statistics_nb = f'{num_unique_genes} gene was found in Nipponbare' + else: + gene_statistics_nb = f'{num_unique_genes} genes were found in Nipponbare' + + for idx, other_ref in enumerate(other_refs): + common_genes_raw = get_common_genes([other_ref], nb_intervals) + num_unique_genes = get_num_unique_entries( + common_genes_raw, 'OGI') + if idx == len(other_refs) - 1: + if num_unique_genes == 1: + gene_statistics_nb += f', and {num_unique_genes} gene in {other_ref}' + else: + gene_statistics_nb += f', and {num_unique_genes} genes in {other_ref}' + else: + if num_unique_genes == 1: + gene_statistics_nb += f', {num_unique_genes} gene in {other_ref}' + else: + gene_statistics_nb += f', {num_unique_genes} genes in {other_ref}' + + gene_statistics_nb += '. ' + gene_statistics_items = [html.Li(gene_statistics_nb)] + + if other_refs: + other_refs.append('Nipponbare') + genes_common = get_common_genes(other_refs, nb_intervals) + num_unique_genes = get_num_unique_entries(genes_common, 'OGI') + + if num_unique_genes == 1: + gene_statistics_common = f'Among these, {num_unique_genes} gene is common to all cultivars.' + else: + gene_statistics_common = f'Among these, {num_unique_genes} genes are common to all cultivars.' + + gene_statistics_items.append( + html.Li(gene_statistics_common)) + + gene_statistics_other_ref = f'' + other_refs.pop() # Remove added Nipponbare + for idx, other_ref in enumerate(other_refs): + genes_from_other_ref_raw = get_unique_genes_in_other_ref( + other_ref, nb_intervals) + + if len(other_refs) > 1 and idx == len(other_refs) - 1: + gene_statistics_other_ref += f', and ' + elif idx != 0: + gene_statistics_other_ref += f', ' + + num_unique_genes = get_num_unique_entries( + genes_from_other_ref_raw, 'OGI') + + if num_unique_genes == 1: + gene_statistics_other_ref += f'{num_unique_genes} gene is unique to {other_ref}' + else: + gene_statistics_other_ref += f'{num_unique_genes} genes are unique to {other_ref}' + + gene_statistics_other_ref += '.' + gene_statistics_items.append( + html.Li(gene_statistics_other_ref)) + + # Setting the class name of lift-over-results-tabs to None is for removing the top margin during loading + return gene_statistics_items, None + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-table', 'columns'), + Output('lift-over-results-table', 'data'), + + Input('homepage-genomic-intervals-submitted-input', 'data'), + Input('lift-over-results-tabs', 'active_tab'), + Input('lift-over-overlap-table-filter', 'value'), + Input('lift-over-other-refs-submitted-input', 'data'), + + State('lift-over-results-tabs', 'children'), + State('homepage-is-submitted', 'data'), + State('lift-over-is-submitted', 'data') + ) + def display_gene_tables(nb_intervals_str, active_tab, filter_rice_variants, other_refs, children, homepage_is_submitted, lift_over_is_submitted): + if homepage_is_submitted and lift_over_is_submitted: + nb_intervals = get_genomic_intervals_from_input( + nb_intervals_str) + + if active_tab == get_tab_id('All Genes'): + all_genes_raw = get_all_genes(other_refs, nb_intervals) + + mask = (all_genes_raw['OGI'] != NULL_PLACEHOLDER) + all_genes_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( + all_genes_raw, 'OGI') + if 'Nipponbare' in all_genes_raw.columns: + mask = (all_genes_raw['Nipponbare'] != NULL_PLACEHOLDER) + all_genes_raw.loc[mask, 'Nipponbare'] = get_rgi_genecard_link( + all_genes_raw, 'Nipponbare') + + for cultivar in other_ref_genomes: + if cultivar in all_genes_raw.columns: + mask = (all_genes_raw[cultivar] != NULL_PLACEHOLDER) + all_genes_raw.loc[mask, cultivar] = get_rgi_genecard_link( + all_genes_raw, cultivar) + + all_genes = all_genes_raw.to_dict('records') + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in all_genes_raw.columns] + + return columns, all_genes + + elif active_tab == get_tab_id('Common Genes'): + common_genes_raw = get_common_genes( + filter_rice_variants, nb_intervals) + + # Mask will be triggered if no cultivar is selected + mask = (common_genes_raw['OGI'] != NULL_PLACEHOLDER) + common_genes_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( + common_genes_raw, 'OGI') + + if 'Nipponbare' in common_genes_raw.columns: + mask = (common_genes_raw['Nipponbare'] != NULL_PLACEHOLDER) + common_genes_raw.loc[mask, 'Nipponbare'] = get_rgi_genecard_link( + common_genes_raw, 'Nipponbare') + + for cultivar in other_ref_genomes: + if cultivar in common_genes_raw.columns: + mask = (common_genes_raw[cultivar] != NULL_PLACEHOLDER) + common_genes_raw.loc[mask, cultivar] = get_rgi_genecard_link( + common_genes_raw, cultivar) + + common_genes = common_genes_raw.to_dict('records') + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in common_genes_raw.columns] + + return columns, common_genes + + elif active_tab == get_tab_id('Nipponbare'): + genes_from_Nb_raw = get_genes_in_Nb( + nb_intervals)[0].drop( + ['Chromosome', 'Start', 'End', 'Strand'], axis=1) + + mask = (genes_from_Nb_raw['OGI'] != NULL_PLACEHOLDER) + genes_from_Nb_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( + genes_from_Nb_raw, 'OGI') + + mask = (genes_from_Nb_raw['Name'] != NULL_PLACEHOLDER) + genes_from_Nb_raw.loc[mask, 'Name'] = get_rgi_genecard_link( + genes_from_Nb_raw, 'Name') + + genes_from_Nb = genes_from_Nb_raw.to_dict('records') + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in genes_from_Nb_raw.columns] + + return columns, genes_from_Nb + + else: + tab_number = get_tab_index(active_tab) + other_ref = children[tab_number]['props']['value'] + + genes_from_other_ref_raw = get_unique_genes_in_other_ref( + other_ref, nb_intervals) + + mask = (genes_from_other_ref_raw['OGI'] != NULL_PLACEHOLDER) + genes_from_other_ref_raw.loc[mask, 'OGI'] = get_rgi_orthogroup_link( + genes_from_other_ref_raw, 'OGI') + + mask = (genes_from_other_ref_raw['Name'] != NULL_PLACEHOLDER) + genes_from_other_ref_raw.loc[mask, 'Name'] = get_rgi_genecard_link( + genes_from_other_ref_raw, 'Name') + + genes_from_other_ref = genes_from_other_ref_raw.to_dict( + 'records') + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in genes_from_other_ref_raw.columns] + + return columns, genes_from_other_ref + + raise PreventUpdate + + @app.callback( + Output('lift-over-results-table', 'filter_query'), + + Input('lift-over-reset-table', 'n_clicks'), + Input('lift-over-results-tabs', 'active_tab'), + Input('lift-over-overlap-table-filter', 'value') + ) + def reset_table_filters(*_): + return '' + + @app.callback( + Output('lift-over-download-df-to-csv', 'data'), + Input('lift-over-export-table', 'n_clicks'), + State('lift-over-results-table', 'data'), + State('homepage-genomic-intervals-submitted-input', 'data') + ) + def download_lift_over_table_to_csv(download_n_clicks, lift_over_df, genomic_intervals): + if download_n_clicks >= 1: + df = pd.DataFrame(lift_over_df) + return dcc.send_data_frame(df.to_csv, f'[{genomic_intervals}] Gene List and Lift-Over.csv', index=False) + + raise PreventUpdate diff --git a/callbacks/lift_over/util.py b/callbacks/lift_over/util.py index b2e4252b..7d6cc724 100644 --- a/callbacks/lift_over/util.py +++ b/callbacks/lift_over/util.py @@ -1,692 +1,692 @@ -import pickle -from collections import defaultdict, namedtuple - -import gffutils -import pandas as pd - -from ..constants import Constants -from ..general_util import * -from ..links_util import * - - -const = Constants() -Genomic_interval = namedtuple('Genomic_interval', ['chrom', 'start', 'stop']) - -# Error codes and messages triggered by a malformed genomic interval entered by the user -Error_message = namedtuple('Error_message', ['code', 'message']) -errors = { - 'NO_CHROM_INTERVAL_SEP': Error_message(1, 'A genomic interval should be entered as chrom:start-end. Use a semicolon (;) to separate multiple intervals'), - 'NO_START_STOP_SEP': Error_message(2, 'Specify a valid start and end for the genomic interval'), - 'START_STOP_NOT_INT': Error_message(3, 'The start and end of a genomic interval should be integers'), - 'START_GREATER_THAN_STOP': Error_message(4, 'The start of a genomic interval should not be past the end') -} - -other_ref_genomes = {'N22': 'aus Nagina-22', - 'MH63': 'indica Minghui-63', - 'Azu': 'japonica Azucena', - 'ARC': 'basmati ARC', - 'IR64': 'indica IR64', - 'CMeo': 'japonica CHAO MEO'} - -NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', - 'OGI', 'Chromosome', 'Start', 'End', 'Strand', 'QTL Analyses', 'PubMed Article IDs'] -OTHER_REF_COLUMNS = ['OGI', 'Name', 'Chromosome', 'Start', 'End', 'Strand'] -FRONT_FACING_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'OGI'] -NO_REFS_COLUMNS = ['OGI'] - - -def construct_options_other_ref_genomes(): - return [ - {'value': symbol, 'label': f'{symbol} ({name})'} for symbol, name in other_ref_genomes.items()] - - -def create_empty_df_nb(): - """ - Returns an empty data frame if there are no results - - Returns: - - Empty data frame - """ - return create_empty_df_with_cols(NB_COLUMNS) - - -def create_empty_no_refs_df(): - return create_empty_df_with_cols(NO_REFS_COLUMNS) - - -def create_empty_df_other_refs(): - return create_empty_df_with_cols(OTHER_REF_COLUMNS) - - -def create_empty_front_facing_df(): - return create_empty_df_with_cols(FRONT_FACING_COLUMNS) - -# ===================================================== -# Utility functions for parsing input genomic interval -# ===================================================== - - -def is_error(genomic_interval): - """ - Returns True if given genomic interval is malformed; False, otherwise - - This function assumes that genomic_interval is the return value of to_genomic_interval() - - Parameters: - - genomic_interval: If its first element is an integer (i.e., the error code), - then the given genomic interval is malformed - - Returns: - - True if given genomic interval is malformed; False, otherwise - """ - return isinstance(genomic_interval[0], int) - - -def get_error_message(error_code): - """ - Returns the message associated with the error code if the user inputs a malformed genomic interval - - Parameters: - - error_code: Error code triggered by the malformed genomic interval - - Returns: - - Message associated with the given error code - """ - for _, code_message in errors.items(): - if code_message.code == error_code: - return code_message.message - - -def is_one_digit_chromosome(chromosome): - """ - Checks if given chromosome only has a single digit (e.g., Chr1, Chr2) - - Parameters: - - chromosome: Chromosome to be checked - - Returns: - - True if given chromosome only has a single digit; False, otherwise - """ - # Examples: Chr1, Chr2 - return len(chromosome) == len('Chr') + 1 - - -def pad_one_digit_chromosome(chromosome): - """ - Prepends a 0 to the chromosome number if it only has a single digit - For example, if the input is 'Chr1', it returns 'Chr01' - - This function assumes that the given chromosome only has a single digit - - Parameters: - - chromosome: Chromosome to be padded - - Returns: - - Chromosome with a leading 0 prepended - """ - return chromosome[:-1] + '0' + chromosome[-1] - - -def to_genomic_interval(genomic_interval_str): - """ - Converts a genomic interval extracted from the user input into a Genomic_interval tuple - If the genomic interval is malformed, it returns the error code, alongside the genomic interval - - Parameters: - - genomic_interval_str: Genomic interval extracted from the user input - - Returns: - - If the genomic interval is valid: Genomic_interval tuple - - Otherwise: Tuple containing the triggered error code and the genomic interval - """ - try: - chrom, interval = genomic_interval_str.split(":") - if is_one_digit_chromosome(chrom): - chrom = pad_one_digit_chromosome(chrom) - - except ValueError: - return errors['NO_CHROM_INTERVAL_SEP'].code, genomic_interval_str - - try: - start, stop = interval.split("-") - except ValueError: - return errors['NO_START_STOP_SEP'].code, genomic_interval_str - - try: - start = int(start) - stop = int(stop) - except ValueError: - return errors['START_STOP_NOT_INT'].code, genomic_interval_str - - if start > stop: - return errors['START_GREATER_THAN_STOP'].code, genomic_interval_str - - return Genomic_interval(chrom, start, stop) - - -def sanitize_nb_intervals_str(nb_intervals_str): - """ - Sanitizes the genomic intervals entered by the user by removing spaces and removing trailing semicolons - - Parameters: - - nb_intervals_str: Genomic intervals entered by the user - - Returns: - - Sanitized genomic interval - """ - nb_intervals_str = nb_intervals_str.replace(' ', '') - nb_intervals_str = nb_intervals_str.rstrip(';') - - return nb_intervals_str - - -def get_genomic_intervals_from_input(nb_intervals_str): - """ - Extracts the Genomic_interval tuples from the genomic intervals entered by the user - - Parameters: - - nb_intervals_str: Genomic intervals entered by the user - - Returns: - - List of Genomic_interval tuples - """ - nb_intervals_str = sanitize_nb_intervals_str(nb_intervals_str) - nb_intervals = [] - - nb_intervals_split = nb_intervals_str.split(";") - - for interval_str in nb_intervals_split: - interval = to_genomic_interval(interval_str) - - # Trap if at least one of the genomic intervals is malformed - if is_error(interval): - return interval - else: - nb_intervals.append(interval) - - return nb_intervals - -# ============================================================================ -# Utility functions for displaying lift-over results and sanitizng accessions -# ============================================================================ - - -def get_tabs(): - """ - Returns the tabs to be displayed in the liftover results - The tabs do not include those that are specific to a reference - - Returns: - - Tabs to be displayed in the liftover results (except those specific to a reference) - """ - return ['All Genes', 'Common Genes', 'Nipponbare'] - - -def get_tab_id(tab): - """ - Returns the index of given tab with respect to the tabs to be displayed in the liftover results - - Parameters: - - tab: Tab whose idnex is to be returned - - Returns: - - Index of given tab with respect to the tabs to be displayed in the liftover results - """ - return f'tab-{get_tabs().index(tab)}' - - -def sanitize_other_refs(other_refs): - """ - Returns the references (other than Nipponbare) selected by the user - - The need for this function is motivated by the fact that, when the user only chooses one reference, - the data type of this chosen value is string (not list) - - Parameters: - - other_refs: References (other than Nipponbare) selected by the user - - Returns: - - List of references (other than Nipponbare) selected by the user - """ - if other_refs: - if isinstance(other_refs, str): - return [other_refs] - else: - return other_refs - - return [] - - -def sanitize_gene_id(gene_id): - """ - Removes "gene:" prefix in given accession - - Parameters: - - gene_id: Accession - - Returns: - - Accession without the "gene:" prefix - """ - if gene_id[:len('gene:')] == 'gene:': - return gene_id[len('gene:'):] - - return gene_id - - -# =============================================== -# Utility functions for OGI-to-reference mapping -# =============================================== - - -def get_ogi_list(accession_ids, ogi_mapping): - """ - Returns the list of equivalent OGIs of given accessions - - Parameters: - - accession_ids: Accessions - - ogi_mapping: OGI-to-accession mapping dictionary - - Returns: - - list of equivalent OGIs of given accessions - """ - ogi_list = [ogi_mapping[accession_id] for accession_id in accession_ids] - - return ogi_list - - -def get_ogi_nb(nb_intervals): - """ - Maps Nipponbare accessions (obtained from a list of Genomic_interval tuples) to their respective OGIs - - Parameters: - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Set containing all unique OGIs after performing OGI-to-Nipponbare mapping - - OGI-to-Nipponbare mapping dictionary - """ - - # All unique OGIs - final_ogi_set = set() - - # OGI-to-NB mapping dictionary (one OGI can map to multiple NB accessions) - final_ogi_dict = defaultdict(set) - - for nb_interval in nb_intervals: - # Load and search GFF_DB of Nipponbare - db = gffutils.FeatureDB( - f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) - genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), - completely_within=False, featuretype='gene')) - - # Map Nipponbare accessions to OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' - with open(ogi_mapping_path, 'rb') as f: - ogi_mapping = pickle.load(f) - for gene in genes_in_interval: - gene_id = sanitize_gene_id(gene.id) - ogi = ogi_mapping[gene_id] - - final_ogi_set.add(ogi) - final_ogi_dict[ogi].add(gene_id) - - return final_ogi_set, final_ogi_dict - - -def get_ogi_other_ref(ref, nb_intervals): - """ - Maps reference-specific accessions (obtained from a list of Genomic_interval tuples) to their respective OGIs - "Reference" refers to a reference other than Nipponbare - Nipponbare reference is handled by get_ogi_nb() - - Parameters: - - ref: Reference - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Set containing all unique OGIs after performing OGI-to-reference mapping - - OGI-to-reference mapping dictionary - """ - - # All unique OGIs - final_ogi_set = set() - - # OGI-to-NB mapping dictionary (one OGI can map to multiple NB accessions) - final_ogi_dict = defaultdict(set) - - # Get intervals from other refs that align to (parts) of the input loci - db_align = gffutils.FeatureDB( - f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') - - # Get corresponding intervals on ref - db_annotation = gffutils.FeatureDB( - f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db".format(ref)) - - for nb_interval in nb_intervals: - gff_intersections = list(db_align.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), - completely_within=False)) - for intersection in gff_intersections: - ref_interval = to_genomic_interval( - intersection.attributes['Name'][0]) - genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop), - completely_within=False, featuretype='gene')) - - # Map reference-specific accessions to OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' - with open(ogi_mapping_path, 'rb') as f: - ogi_mapping = pickle.load(f) - for gene in genes_in_interval: - gene_id = sanitize_gene_id(gene.id) - ogi = ogi_mapping[gene_id] - - final_ogi_set.add(ogi) - final_ogi_dict[ogi].add(gene_id) - - return final_ogi_set, final_ogi_dict - -# ================================================== -# Utility function related to QTARO and Text Mining -# ================================================== - - -def get_qtaro_entry(mapping, gene): - try: - qtaro_str = '
    ' - pub_idx = 1 - for character_major in mapping[gene]: - qtaro_str += '
  • ' + character_major + '
      ' - for character_minor in mapping[gene][character_major]: - pubs = [] - for pub in mapping[gene][character_major][character_minor]: - pubs.append( - '
    • ' + get_doi_link_single_str(pub, pub_idx) + '
    • ') - pub_idx += 1 - - qtaro_str += '
    • ' + character_minor + \ - '
        ' + ''.join(pubs) + '
    • ' - qtaro_str += '

  • ' - - # Remove the line break after the last character major - return qtaro_str[:-len("
    ")] + '
' - except KeyError: - return NULL_PLACEHOLDER - - -def get_qtaro_entries(mapping, genes): - return [get_qtaro_entry(mapping, gene) for gene in genes] - - -def get_pubmed_entry(gene): - try: - with open(f'{const.TEXT_MINING_PUBMED}/{gene}.pickle', 'rb') as f: - mapping = pickle.load(f) - - pubmed_ids = [get_pubmed_link_single_str(pubmed_id[0]) for pubmed_id in sorted( - mapping.items(), key=lambda x: x[1], reverse=True)] - except FileNotFoundError: - return NULL_PLACEHOLDER - - pubmed_str = '' - for idx, pubmed in enumerate(pubmed_ids): - if idx % 2 == 0: - pubmed_str += f'{pubmed}   ' - else: - pubmed_str += f'{pubmed}\n' - - if pubmed_str[-1] == '\n': # Ends in a newline - return pubmed_str[:-len('\n')] - - return pubmed_str[:-len('   ')] - - -# ======================== -# Functions for lift-over -# ======================== - - -def get_genes_in_Nb(nb_intervals): - """ - Returns a data frame containing the genes in Nipponbare - - Parameters: - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Data frame containing the genes in Nipponbare - """ - dfs = [] - - for nb_interval in nb_intervals: - # Load and search GFF_DB of Nipponbare - db = gffutils.FeatureDB( - f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) - genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), - completely_within=False, featuretype='gene')) - - # Map accessions to their respective OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' - ogi_list = [] - with open(ogi_mapping_path, 'rb') as f: - ogi_mapping = pickle.load(f) - ogi_list = get_ogi_list([sanitize_gene_id(gene.id) - for gene in genes_in_interval], ogi_mapping) - - # Get QTARO annotations - with open(const.QTARO_DICTIONARY, 'rb') as f: - qtaro_dict = pickle.load(f) - qtaro_list = get_qtaro_entries( - qtaro_dict, [gene.id for gene in genes_in_interval]) - - pubmed_ids = [get_pubmed_entry(gene.id) for gene in genes_in_interval] - - # Construct the data frame - df = pd.DataFrame({ - 'OGI': ogi_list, - 'Name': [gene.id for gene in genes_in_interval], - 'Chromosome': [gene.chrom for gene in genes_in_interval], - 'Start': [gene.start for gene in genes_in_interval], - 'End': [gene.end for gene in genes_in_interval], - 'Strand': [gene.strand for gene in genes_in_interval], - 'QTL Analyses': qtaro_list, - 'PubMed Article IDs': pubmed_ids - }) - - dfs.append(df) - - try: - table_gene_ids = pd.concat(dfs, ignore_index=True) - # Read in dataframe containing gene descriptions - gene_description_df = pd.read_csv( - f'{const.GENE_DESCRIPTIONS}/Nb/Nb_gene_descriptions.csv') - # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs - table = pd.merge(gene_description_df, table_gene_ids, - left_on='Gene_ID', right_on='Name', how='right') - - # Reorder columns - table = table[NB_COLUMNS] - - table['UniProtKB/Swiss-Prot'] = get_uniprot_link( - table, 'UniProtKB/Swiss-Prot') - - table = table.fillna(NULL_PLACEHOLDER) - - if table.shape[0] == 0: - return create_empty_df_nb(), table['Name'].values.tolist() - - return table, table['Name'].values.tolist() - - except ValueError: # No results to concatenate - return create_empty_df_nb(), table['Name'].values.tolist() - - -def get_genes_in_other_ref(ref, nb_intervals): - """ - Returns a data frame containing the genes in references other than Nipponbare - Nipponbare is handled by get_genes_in_Nb() - - Parameters: - - ref: Reference - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Data frame containing the genes in references other than Nipponbare - """ - - # Get intervals from other refs that align to (parts) of the input loci - db_align = gffutils.FeatureDB( - f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') - - # Get corresponding intervals on ref - db_annotation = gffutils.FeatureDB( - f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db") - - dfs = [] - - for nb_interval in nb_intervals: - gff_intersections = list(db_align.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), - completely_within=False)) - for intersection in gff_intersections: - ref_interval = to_genomic_interval( - intersection.attributes['Name'][0]) - genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop), - completely_within=False, featuretype='gene')) - - # Map accessions to their respective OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' - ogi_list = [] - with open(ogi_mapping_path, 'rb') as f: - ogi_mapping = pickle.load(f) - ogi_list = get_ogi_list([sanitize_gene_id(gene.id) - for gene in genes_in_interval], ogi_mapping) - - # Construct the data frame - df = pd.DataFrame({ - 'OGI': ogi_list, - 'Name': [sanitize_gene_id(gene.id) for gene in genes_in_interval], - 'Chromosome': [gene.chrom for gene in genes_in_interval], - 'Start': [gene.start for gene in genes_in_interval], - 'End': [gene.end for gene in genes_in_interval], - 'Strand': [gene.strand for gene in genes_in_interval] - }) - dfs.append(df) - - try: - table = pd.concat(dfs, ignore_index=True) - if table.shape[0] == 0: - return create_empty_df_other_refs() - - return table - - except ValueError: # No results to concatenate - return create_empty_df_other_refs() - - -def get_common_genes(refs, nb_intervals): - """ - Returns a data frame containing the genes common to the given references - - Parameters: - - ref: References - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Data frame containing the genes common to the given references - """ - # No cultivars selected - if not refs: - return create_empty_no_refs_df() - - common_genes = None - for ref in refs: - if ref != 'Nipponbare': - genes_in_ref = get_genes_in_other_ref(ref, nb_intervals) - else: - genes_in_ref = get_genes_in_Nb(nb_intervals)[0] - - genes_in_ref = genes_in_ref[['OGI', 'Name']] - - try: - common_genes = pd.merge( - common_genes, genes_in_ref, on='OGI') - # First instance of merging (that is, common_genes is still None) - except TypeError: - common_genes = genes_in_ref - - common_genes = common_genes.rename( - columns={'Name_x': 'Nipponbare', 'Name_y': ref, 'Name': ref}) - - common_genes = common_genes.rename( - columns={'Name': 'Nipponbare'}).dropna().drop_duplicates() - - return common_genes - - -def get_all_genes(refs, nb_intervals): - """ - Returns a data frame containing all the genes (i.e., the set-theoretic union of all the genes) - in Nipponbare, as well as orthologous genes in the given references - - Parameters: - - ref: References (other than Nipponbare) - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Data frame containing all the genes - """ - genes_in_nb = get_genes_in_Nb(nb_intervals)[0] - genes_in_nb = genes_in_nb[['OGI', 'Name']] - - common_genes = genes_in_nb - for ref in refs: - if ref != 'Nipponbare': - genes_in_other_ref = get_genes_in_other_ref(ref, nb_intervals) - genes_in_other_ref = genes_in_other_ref[['OGI', 'Name']] - common_genes = pd.merge( - common_genes, genes_in_other_ref, on='OGI', how='outer') - - common_genes = common_genes.rename( - columns={'Name_x': 'Nipponbare', 'Name_y': ref, 'Name': ref}) - - common_genes = common_genes.rename( - columns={'Name': 'Nipponbare'}).fillna(NULL_PLACEHOLDER).drop_duplicates() - - return common_genes - - -def get_unique_genes_in_other_ref(ref, nb_intervals): - """ - Returns a data frame containing the genes in a reference that are not present in Nipponbare - - Parameters: - - ref: References - - nb_intervals: List of Genomic_interval tuples - - Returns: - - Data frame containing the genes in a reference that are not present in Nipponbare - """ - genes_in_nb = get_genes_in_Nb(nb_intervals)[0] - genes_in_other_ref = get_genes_in_other_ref(ref, nb_intervals) - - genes_in_nb = genes_in_nb[['OGI']] - - # Get set difference - unique_genes = pd.concat([genes_in_other_ref, genes_in_nb, genes_in_nb]).drop_duplicates( - subset=['OGI'], keep=False) - - gene_description_df = pd.read_csv( - f'{const.GENE_DESCRIPTIONS}/{ref}/{ref}_gene_descriptions.csv') - # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs - unique_genes = pd.merge(gene_description_df, unique_genes, - left_on='Gene_ID', right_on='Name', how='right') - - unique_genes = unique_genes[FRONT_FACING_COLUMNS] - - unique_genes['UniProtKB/Swiss-Prot'] = get_uniprot_link( - unique_genes, 'UniProtKB/Swiss-Prot') - - unique_genes = unique_genes.fillna(NULL_PLACEHOLDER) - - if unique_genes.shape[0] == 0: - return create_empty_front_facing_df() - - return unique_genes +import pickle +from collections import defaultdict, namedtuple + +import gffutils +import pandas as pd + +from ..constants import Constants +from ..general_util import * +from ..links_util import * + + +const = Constants() +Genomic_interval = namedtuple('Genomic_interval', ['chrom', 'start', 'stop']) + +# Error codes and messages triggered by a malformed genomic interval entered by the user +Error_message = namedtuple('Error_message', ['code', 'message']) +errors = { + 'NO_CHROM_INTERVAL_SEP': Error_message(1, 'A genomic interval should be entered as chrom:start-end. Use a semicolon (;) to separate multiple intervals'), + 'NO_START_STOP_SEP': Error_message(2, 'Specify a valid start and end for the genomic interval'), + 'START_STOP_NOT_INT': Error_message(3, 'The start and end of a genomic interval should be integers'), + 'START_GREATER_THAN_STOP': Error_message(4, 'The start of a genomic interval should not be past the end') +} + +other_ref_genomes = {'N22': 'aus Nagina-22', + 'MH63': 'indica Minghui-63', + 'Azu': 'japonica Azucena', + 'ARC': 'basmati ARC', + 'IR64': 'indica IR64', + 'CMeo': 'japonica CHAO MEO'} + +NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', + 'OGI', 'Chromosome', 'Start', 'End', 'Strand', 'QTL Analyses', 'PubMed Article IDs'] +OTHER_REF_COLUMNS = ['OGI', 'Name', 'Chromosome', 'Start', 'End', 'Strand'] +FRONT_FACING_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'OGI'] +NO_REFS_COLUMNS = ['OGI'] + + +def construct_options_other_ref_genomes(): + return [ + {'value': symbol, 'label': f'{symbol} ({name})'} for symbol, name in other_ref_genomes.items()] + + +def create_empty_df_nb(): + """ + Returns an empty data frame if there are no results + + Returns: + - Empty data frame + """ + return create_empty_df_with_cols(NB_COLUMNS) + + +def create_empty_no_refs_df(): + return create_empty_df_with_cols(NO_REFS_COLUMNS) + + +def create_empty_df_other_refs(): + return create_empty_df_with_cols(OTHER_REF_COLUMNS) + + +def create_empty_front_facing_df(): + return create_empty_df_with_cols(FRONT_FACING_COLUMNS) + +# ===================================================== +# Utility functions for parsing input genomic interval +# ===================================================== + + +def is_error(genomic_interval): + """ + Returns True if given genomic interval is malformed; False, otherwise + + This function assumes that genomic_interval is the return value of to_genomic_interval() + + Parameters: + - genomic_interval: If its first element is an integer (i.e., the error code), + then the given genomic interval is malformed + + Returns: + - True if given genomic interval is malformed; False, otherwise + """ + return isinstance(genomic_interval[0], int) + + +def get_error_message(error_code): + """ + Returns the message associated with the error code if the user inputs a malformed genomic interval + + Parameters: + - error_code: Error code triggered by the malformed genomic interval + + Returns: + - Message associated with the given error code + """ + for _, code_message in errors.items(): + if code_message.code == error_code: + return code_message.message + + +def is_one_digit_chromosome(chromosome): + """ + Checks if given chromosome only has a single digit (e.g., Chr1, Chr2) + + Parameters: + - chromosome: Chromosome to be checked + + Returns: + - True if given chromosome only has a single digit; False, otherwise + """ + # Examples: Chr1, Chr2 + return len(chromosome) == len('Chr') + 1 + + +def pad_one_digit_chromosome(chromosome): + """ + Prepends a 0 to the chromosome number if it only has a single digit + For example, if the input is 'Chr1', it returns 'Chr01' + + This function assumes that the given chromosome only has a single digit + + Parameters: + - chromosome: Chromosome to be padded + + Returns: + - Chromosome with a leading 0 prepended + """ + return chromosome[:-1] + '0' + chromosome[-1] + + +def to_genomic_interval(genomic_interval_str): + """ + Converts a genomic interval extracted from the user input into a Genomic_interval tuple + If the genomic interval is malformed, it returns the error code, alongside the genomic interval + + Parameters: + - genomic_interval_str: Genomic interval extracted from the user input + + Returns: + - If the genomic interval is valid: Genomic_interval tuple + - Otherwise: Tuple containing the triggered error code and the genomic interval + """ + try: + chrom, interval = genomic_interval_str.split(":") + if is_one_digit_chromosome(chrom): + chrom = pad_one_digit_chromosome(chrom) + + except ValueError: + return errors['NO_CHROM_INTERVAL_SEP'].code, genomic_interval_str + + try: + start, stop = interval.split("-") + except ValueError: + return errors['NO_START_STOP_SEP'].code, genomic_interval_str + + try: + start = int(start) + stop = int(stop) + except ValueError: + return errors['START_STOP_NOT_INT'].code, genomic_interval_str + + if start > stop: + return errors['START_GREATER_THAN_STOP'].code, genomic_interval_str + + return Genomic_interval(chrom, start, stop) + + +def sanitize_nb_intervals_str(nb_intervals_str): + """ + Sanitizes the genomic intervals entered by the user by removing spaces and removing trailing semicolons + + Parameters: + - nb_intervals_str: Genomic intervals entered by the user + + Returns: + - Sanitized genomic interval + """ + nb_intervals_str = nb_intervals_str.replace(' ', '') + nb_intervals_str = nb_intervals_str.rstrip(';') + + return nb_intervals_str + + +def get_genomic_intervals_from_input(nb_intervals_str): + """ + Extracts the Genomic_interval tuples from the genomic intervals entered by the user + + Parameters: + - nb_intervals_str: Genomic intervals entered by the user + + Returns: + - List of Genomic_interval tuples + """ + nb_intervals_str = sanitize_nb_intervals_str(nb_intervals_str) + nb_intervals = [] + + nb_intervals_split = nb_intervals_str.split(";") + + for interval_str in nb_intervals_split: + interval = to_genomic_interval(interval_str) + + # Trap if at least one of the genomic intervals is malformed + if is_error(interval): + return interval + else: + nb_intervals.append(interval) + + return nb_intervals + +# ============================================================================ +# Utility functions for displaying lift-over results and sanitizng accessions +# ============================================================================ + + +def get_tabs(): + """ + Returns the tabs to be displayed in the liftover results + The tabs do not include those that are specific to a reference + + Returns: + - Tabs to be displayed in the liftover results (except those specific to a reference) + """ + return ['All Genes', 'Common Genes', 'Nipponbare'] + + +def get_tab_id(tab): + """ + Returns the index of given tab with respect to the tabs to be displayed in the liftover results + + Parameters: + - tab: Tab whose idnex is to be returned + + Returns: + - Index of given tab with respect to the tabs to be displayed in the liftover results + """ + return f'tab-{get_tabs().index(tab)}' + + +def sanitize_other_refs(other_refs): + """ + Returns the references (other than Nipponbare) selected by the user + + The need for this function is motivated by the fact that, when the user only chooses one reference, + the data type of this chosen value is string (not list) + + Parameters: + - other_refs: References (other than Nipponbare) selected by the user + + Returns: + - List of references (other than Nipponbare) selected by the user + """ + if other_refs: + if isinstance(other_refs, str): + return [other_refs] + else: + return other_refs + + return [] + + +def sanitize_gene_id(gene_id): + """ + Removes "gene:" prefix in given accession + + Parameters: + - gene_id: Accession + + Returns: + - Accession without the "gene:" prefix + """ + if gene_id[:len('gene:')] == 'gene:': + return gene_id[len('gene:'):] + + return gene_id + + +# =============================================== +# Utility functions for OGI-to-reference mapping +# =============================================== + + +def get_ogi_list(accession_ids, ogi_mapping): + """ + Returns the list of equivalent OGIs of given accessions + + Parameters: + - accession_ids: Accessions + - ogi_mapping: OGI-to-accession mapping dictionary + + Returns: + - list of equivalent OGIs of given accessions + """ + ogi_list = [ogi_mapping[accession_id] for accession_id in accession_ids] + + return ogi_list + + +def get_ogi_nb(nb_intervals): + """ + Maps Nipponbare accessions (obtained from a list of Genomic_interval tuples) to their respective OGIs + + Parameters: + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Set containing all unique OGIs after performing OGI-to-Nipponbare mapping + - OGI-to-Nipponbare mapping dictionary + """ + + # All unique OGIs + final_ogi_set = set() + + # OGI-to-NB mapping dictionary (one OGI can map to multiple NB accessions) + final_ogi_dict = defaultdict(set) + + for nb_interval in nb_intervals: + # Load and search GFF_DB of Nipponbare + db = gffutils.FeatureDB( + f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) + genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), + completely_within=False, featuretype='gene')) + + # Map Nipponbare accessions to OGIs + ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' + with open(ogi_mapping_path, 'rb') as f: + ogi_mapping = pickle.load(f) + for gene in genes_in_interval: + gene_id = sanitize_gene_id(gene.id) + ogi = ogi_mapping[gene_id] + + final_ogi_set.add(ogi) + final_ogi_dict[ogi].add(gene_id) + + return final_ogi_set, final_ogi_dict + + +def get_ogi_other_ref(ref, nb_intervals): + """ + Maps reference-specific accessions (obtained from a list of Genomic_interval tuples) to their respective OGIs + "Reference" refers to a reference other than Nipponbare + Nipponbare reference is handled by get_ogi_nb() + + Parameters: + - ref: Reference + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Set containing all unique OGIs after performing OGI-to-reference mapping + - OGI-to-reference mapping dictionary + """ + + # All unique OGIs + final_ogi_set = set() + + # OGI-to-NB mapping dictionary (one OGI can map to multiple NB accessions) + final_ogi_dict = defaultdict(set) + + # Get intervals from other refs that align to (parts) of the input loci + db_align = gffutils.FeatureDB( + f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') + + # Get corresponding intervals on ref + db_annotation = gffutils.FeatureDB( + f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db".format(ref)) + + for nb_interval in nb_intervals: + gff_intersections = list(db_align.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), + completely_within=False)) + for intersection in gff_intersections: + ref_interval = to_genomic_interval( + intersection.attributes['Name'][0]) + genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop), + completely_within=False, featuretype='gene')) + + # Map reference-specific accessions to OGIs + ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' + with open(ogi_mapping_path, 'rb') as f: + ogi_mapping = pickle.load(f) + for gene in genes_in_interval: + gene_id = sanitize_gene_id(gene.id) + ogi = ogi_mapping[gene_id] + + final_ogi_set.add(ogi) + final_ogi_dict[ogi].add(gene_id) + + return final_ogi_set, final_ogi_dict + +# ================================================== +# Utility function related to QTARO and Text Mining +# ================================================== + + +def get_qtaro_entry(mapping, gene): + try: + qtaro_str = '
    ' + pub_idx = 1 + for character_major in mapping[gene]: + qtaro_str += '
  • ' + character_major + '
      ' + for character_minor in mapping[gene][character_major]: + pubs = [] + for pub in mapping[gene][character_major][character_minor]: + pubs.append( + '
    • ' + get_doi_link_single_str(pub, pub_idx) + '
    • ') + pub_idx += 1 + + qtaro_str += '
    • ' + character_minor + \ + '
        ' + ''.join(pubs) + '
    • ' + qtaro_str += '

  • ' + + # Remove the line break after the last character major + return qtaro_str[:-len("
    ")] + '
' + except KeyError: + return NULL_PLACEHOLDER + + +def get_qtaro_entries(mapping, genes): + return [get_qtaro_entry(mapping, gene) for gene in genes] + + +def get_pubmed_entry(gene): + try: + with open(f'{const.TEXT_MINING_PUBMED}/{gene}.pickle', 'rb') as f: + mapping = pickle.load(f) + + pubmed_ids = [get_pubmed_link_single_str(pubmed_id[0]) for pubmed_id in sorted( + mapping.items(), key=lambda x: x[1], reverse=True)] + except FileNotFoundError: + return NULL_PLACEHOLDER + + pubmed_str = '' + for idx, pubmed in enumerate(pubmed_ids): + if idx % 2 == 0: + pubmed_str += f'{pubmed}   ' + else: + pubmed_str += f'{pubmed}\n' + + if pubmed_str[-1] == '\n': # Ends in a newline + return pubmed_str[:-len('\n')] + + return pubmed_str[:-len('   ')] + + +# ======================== +# Functions for lift-over +# ======================== + + +def get_genes_in_Nb(nb_intervals): + """ + Returns a data frame containing the genes in Nipponbare + + Parameters: + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Data frame containing the genes in Nipponbare + """ + dfs = [] + + for nb_interval in nb_intervals: + # Load and search GFF_DB of Nipponbare + db = gffutils.FeatureDB( + f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) + genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), + completely_within=False, featuretype='gene')) + + # Map accessions to their respective OGIs + ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' + ogi_list = [] + with open(ogi_mapping_path, 'rb') as f: + ogi_mapping = pickle.load(f) + ogi_list = get_ogi_list([sanitize_gene_id(gene.id) + for gene in genes_in_interval], ogi_mapping) + + # Get QTARO annotations + with open(const.QTARO_DICTIONARY, 'rb') as f: + qtaro_dict = pickle.load(f) + qtaro_list = get_qtaro_entries( + qtaro_dict, [gene.id for gene in genes_in_interval]) + + pubmed_ids = [get_pubmed_entry(gene.id) for gene in genes_in_interval] + + # Construct the data frame + df = pd.DataFrame({ + 'OGI': ogi_list, + 'Name': [gene.id for gene in genes_in_interval], + 'Chromosome': [gene.chrom for gene in genes_in_interval], + 'Start': [gene.start for gene in genes_in_interval], + 'End': [gene.end for gene in genes_in_interval], + 'Strand': [gene.strand for gene in genes_in_interval], + 'QTL Analyses': qtaro_list, + 'PubMed Article IDs': pubmed_ids + }) + + dfs.append(df) + + try: + table_gene_ids = pd.concat(dfs, ignore_index=True) + # Read in dataframe containing gene descriptions + gene_description_df = pd.read_csv( + f'{const.GENE_DESCRIPTIONS}/Nb/Nb_gene_descriptions.csv') + # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs + table = pd.merge(gene_description_df, table_gene_ids, + left_on='Gene_ID', right_on='Name', how='right') + + # Reorder columns + table = table[NB_COLUMNS] + + table['UniProtKB/Swiss-Prot'] = get_uniprot_link( + table, 'UniProtKB/Swiss-Prot') + + table = table.fillna(NULL_PLACEHOLDER) + + if table.shape[0] == 0: + return create_empty_df_nb(), table['Name'].values.tolist() + + return table, table['Name'].values.tolist() + + except ValueError: # No results to concatenate + return create_empty_df_nb(), table['Name'].values.tolist() + + +def get_genes_in_other_ref(ref, nb_intervals): + """ + Returns a data frame containing the genes in references other than Nipponbare + Nipponbare is handled by get_genes_in_Nb() + + Parameters: + - ref: Reference + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Data frame containing the genes in references other than Nipponbare + """ + + # Get intervals from other refs that align to (parts) of the input loci + db_align = gffutils.FeatureDB( + f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') + + # Get corresponding intervals on ref + db_annotation = gffutils.FeatureDB( + f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db") + + dfs = [] + + for nb_interval in nb_intervals: + gff_intersections = list(db_align.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), + completely_within=False)) + for intersection in gff_intersections: + ref_interval = to_genomic_interval( + intersection.attributes['Name'][0]) + genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop), + completely_within=False, featuretype='gene')) + + # Map accessions to their respective OGIs + ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' + ogi_list = [] + with open(ogi_mapping_path, 'rb') as f: + ogi_mapping = pickle.load(f) + ogi_list = get_ogi_list([sanitize_gene_id(gene.id) + for gene in genes_in_interval], ogi_mapping) + + # Construct the data frame + df = pd.DataFrame({ + 'OGI': ogi_list, + 'Name': [sanitize_gene_id(gene.id) for gene in genes_in_interval], + 'Chromosome': [gene.chrom for gene in genes_in_interval], + 'Start': [gene.start for gene in genes_in_interval], + 'End': [gene.end for gene in genes_in_interval], + 'Strand': [gene.strand for gene in genes_in_interval] + }) + dfs.append(df) + + try: + table = pd.concat(dfs, ignore_index=True) + if table.shape[0] == 0: + return create_empty_df_other_refs() + + return table + + except ValueError: # No results to concatenate + return create_empty_df_other_refs() + + +def get_common_genes(refs, nb_intervals): + """ + Returns a data frame containing the genes common to the given references + + Parameters: + - ref: References + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Data frame containing the genes common to the given references + """ + # No cultivars selected + if not refs: + return create_empty_no_refs_df() + + common_genes = None + for ref in refs: + if ref != 'Nipponbare': + genes_in_ref = get_genes_in_other_ref(ref, nb_intervals) + else: + genes_in_ref = get_genes_in_Nb(nb_intervals)[0] + + genes_in_ref = genes_in_ref[['OGI', 'Name']] + + try: + common_genes = pd.merge( + common_genes, genes_in_ref, on='OGI') + # First instance of merging (that is, common_genes is still None) + except TypeError: + common_genes = genes_in_ref + + common_genes = common_genes.rename( + columns={'Name_x': 'Nipponbare', 'Name_y': ref, 'Name': ref}) + + common_genes = common_genes.rename( + columns={'Name': 'Nipponbare'}).dropna().drop_duplicates() + + return common_genes + + +def get_all_genes(refs, nb_intervals): + """ + Returns a data frame containing all the genes (i.e., the set-theoretic union of all the genes) + in Nipponbare, as well as orthologous genes in the given references + + Parameters: + - ref: References (other than Nipponbare) + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Data frame containing all the genes + """ + genes_in_nb = get_genes_in_Nb(nb_intervals)[0] + genes_in_nb = genes_in_nb[['OGI', 'Name']] + + common_genes = genes_in_nb + for ref in refs: + if ref != 'Nipponbare': + genes_in_other_ref = get_genes_in_other_ref(ref, nb_intervals) + genes_in_other_ref = genes_in_other_ref[['OGI', 'Name']] + common_genes = pd.merge( + common_genes, genes_in_other_ref, on='OGI', how='outer') + + common_genes = common_genes.rename( + columns={'Name_x': 'Nipponbare', 'Name_y': ref, 'Name': ref}) + + common_genes = common_genes.rename( + columns={'Name': 'Nipponbare'}).fillna(NULL_PLACEHOLDER).drop_duplicates() + + return common_genes + + +def get_unique_genes_in_other_ref(ref, nb_intervals): + """ + Returns a data frame containing the genes in a reference that are not present in Nipponbare + + Parameters: + - ref: References + - nb_intervals: List of Genomic_interval tuples + + Returns: + - Data frame containing the genes in a reference that are not present in Nipponbare + """ + genes_in_nb = get_genes_in_Nb(nb_intervals)[0] + genes_in_other_ref = get_genes_in_other_ref(ref, nb_intervals) + + genes_in_nb = genes_in_nb[['OGI']] + + # Get set difference + unique_genes = pd.concat([genes_in_other_ref, genes_in_nb, genes_in_nb]).drop_duplicates( + subset=['OGI'], keep=False) + + gene_description_df = pd.read_csv( + f'{const.GENE_DESCRIPTIONS}/{ref}/{ref}_gene_descriptions.csv') + # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs + unique_genes = pd.merge(gene_description_df, unique_genes, + left_on='Gene_ID', right_on='Name', how='right') + + unique_genes = unique_genes[FRONT_FACING_COLUMNS] + + unique_genes['UniProtKB/Swiss-Prot'] = get_uniprot_link( + unique_genes, 'UniProtKB/Swiss-Prot') + + unique_genes = unique_genes.fillna(NULL_PLACEHOLDER) + + if unique_genes.shape[0] == 0: + return create_empty_front_facing_df() + + return unique_genes diff --git a/callbacks/links_util.py b/callbacks/links_util.py index 8f4e19fc..41a4e8c8 100644 --- a/callbacks/links_util.py +++ b/callbacks/links_util.py @@ -1,58 +1,58 @@ -A_HREF = '' -LINK_ICON = '  ' - - -def get_genes_from_kegg_link(link): - idx = link.find('?') - query = link[idx:].split('+') - - return '\n'.join(query[1:]) - - -def get_kegg_link(result, id_col, genes_col): - return A_HREF + 'http://www.genome.jp/kegg-bin/show_pathway?' + \ - result[id_col] + '+' + result[genes_col].str.split('\n').str.join('+') + \ - CLOSE_A_HREF + result[id_col] + LINK_ICON - - -def get_go_link(result, id_col): - return A_HREF + 'https://amigo.geneontology.org/amigo/term/' + \ - result[id_col] + \ - CLOSE_A_HREF + result[id_col] + LINK_ICON - - -def get_to_po_link(result, id_col): - return A_HREF + 'https://www.ebi.ac.uk/ols4/ontologies/to/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F' + \ - result[id_col].str.replace(':', '_') + \ - CLOSE_A_HREF + result[id_col] + LINK_ICON - - -def get_uniprot_link(result, id_col): - return A_HREF + 'https://www.uniprot.org/uniprotkb/' + \ - result[id_col] + '/entry' + CLOSE_A_HREF + \ - result[id_col] + LINK_ICON - - -def get_pubmed_link(result, id_col): - return A_HREF + 'https://pubmed.ncbi.nlm.nih.gov/' + \ - result[id_col] + '/entry' + CLOSE_A_HREF + \ - result[id_col] + LINK_ICON - - -def get_doi_link_single_str(doi, pub_idx): - return A_HREF + 'https://doi.org/' + doi + CLOSE_A_HREF + 'Publication ' + str(pub_idx) + LINK_ICON - - -def get_pubmed_link_single_str(pubmed): - return A_HREF + 'https://pubmed.ncbi.nlm.nih.gov/' + \ - pubmed + '/entry' + CLOSE_A_HREF + \ - pubmed + LINK_ICON - - -def get_rgi_genecard_link(result, id_col): - return A_HREF + 'https://riceome.hzau.edu.cn/genecard/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON - - -def get_rgi_orthogroup_link(result, id_col): - return A_HREF + 'https://riceome.hzau.edu.cn/orthogroup/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON +A_HREF = '' +LINK_ICON = '  ' + + +def get_genes_from_kegg_link(link): + idx = link.find('?') + query = link[idx:].split('+') + + return '\n'.join(query[1:]) + + +def get_kegg_link(result, id_col, genes_col): + return A_HREF + 'http://www.genome.jp/kegg-bin/show_pathway?' + \ + result[id_col] + '+' + result[genes_col].str.split('\n').str.join('+') + \ + CLOSE_A_HREF + result[id_col] + LINK_ICON + + +def get_go_link(result, id_col): + return A_HREF + 'https://amigo.geneontology.org/amigo/term/' + \ + result[id_col] + \ + CLOSE_A_HREF + result[id_col] + LINK_ICON + + +def get_to_po_link(result, id_col): + return A_HREF + 'https://www.ebi.ac.uk/ols4/ontologies/to/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F' + \ + result[id_col].str.replace(':', '_') + \ + CLOSE_A_HREF + result[id_col] + LINK_ICON + + +def get_uniprot_link(result, id_col): + return A_HREF + 'https://www.uniprot.org/uniprotkb/' + \ + result[id_col] + '/entry' + CLOSE_A_HREF + \ + result[id_col] + LINK_ICON + + +def get_pubmed_link(result, id_col): + return A_HREF + 'https://pubmed.ncbi.nlm.nih.gov/' + \ + result[id_col] + '/entry' + CLOSE_A_HREF + \ + result[id_col] + LINK_ICON + + +def get_doi_link_single_str(doi, pub_idx): + return A_HREF + 'https://doi.org/' + doi + CLOSE_A_HREF + 'Publication ' + str(pub_idx) + LINK_ICON + + +def get_pubmed_link_single_str(pubmed): + return A_HREF + 'https://pubmed.ncbi.nlm.nih.gov/' + \ + pubmed + '/entry' + CLOSE_A_HREF + \ + pubmed + LINK_ICON + + +def get_rgi_genecard_link(result, id_col): + return A_HREF + 'https://riceome.hzau.edu.cn/genecard/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON + + +def get_rgi_orthogroup_link(result, id_col): + return A_HREF + 'https://riceome.hzau.edu.cn/orthogroup/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON diff --git a/callbacks/style_util.py b/callbacks/style_util.py index cc9a5957..43974b14 100644 --- a/callbacks/style_util.py +++ b/callbacks/style_util.py @@ -1,11 +1,11 @@ -def add_class_name(class_name, current_class_name): - current_classes = current_class_name.split(' ') - current_classes.append(class_name) - - return ' '.join(current_classes) - - -def remove_class_name(class_name, current_class_name): - current_classes = current_class_name.split(' ') - - return ' '.join([current_class for current_class in current_classes if current_class != class_name]) +def add_class_name(class_name, current_class_name): + current_classes = current_class_name.split(' ') + current_classes.append(class_name) + + return ' '.join(current_classes) + + +def remove_class_name(class_name, current_class_name): + current_classes = current_class_name.split(' ') + + return ' '.join([current_class for current_class in current_classes if current_class != class_name]) diff --git a/callbacks/text_mining/callbacks.py b/callbacks/text_mining/callbacks.py index 8943bc86..bb522c65 100644 --- a/callbacks/text_mining/callbacks.py +++ b/callbacks/text_mining/callbacks.py @@ -1,135 +1,151 @@ -from dash import Input, Output, State, ctx, ALL, html -from dash.exceptions import PreventUpdate -from collections import namedtuple - -from .util import * -from ..lift_over import util as lift_over_util - -def init_callback(app): - - # to display user input interval in the top nav - @app.callback( - Output('text-mining-genomic-intervals-input', 'children'), - State('homepage-genomic-intervals-submitted-input', 'data'), - Input('homepage-is-submitted', 'data'), - Input('text-mining-submit', 'n_clicks') - ) - def display_input(nb_intervals_str, homepage_is_submitted, *_): - if homepage_is_submitted: - if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): - return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] - else: - return None - - raise PreventUpdate - - @app.callback( - Output('text-mining-query-saved-input', 'data', allow_duplicate=True), - Input({'type': 'example-text-mining', - 'description': ALL}, 'n_clicks'), - prevent_initial_call=True - ) - def set_input_fields_with_preset_input(example_text_mining_n_clicks): - if ctx.triggered_id and not all(val == 0 for val in example_text_mining_n_clicks): - return ctx.triggered_id['description'] - - raise PreventUpdate - - - @app.callback( - Output('text-mining-query-saved-input', 'data', allow_duplicate=True), - Input('text-mining-query', 'value'), - prevent_initial_call=True - ) - def set_input_fields(query_string): - return query_string - - - @app.callback( - Output('text-mining-query', 'value'), - Input('text-mining-query-saved-input', 'data'), - ) - def get_input_homepage_session_state(query): - return query - - @app.callback( - Output('text-mining-input-error', 'style'), - Output('text-mining-input-error', 'children'), - - Output('text-mining-is-submitted', 'data', allow_duplicate=True), - Output('text-mining-query-submitted-input', - 'data', allow_duplicate=True), - Input('text-mining-submit', 'n_clicks'), - Input('text-mining-query', 'n_submit'), - State('homepage-is-submitted', 'data'), - State('text-mining-query', 'value'), - prevent_initial_call=True - ) - def submit_text_mining_input(text_mining_submitted_n_clicks, text_mining_query_n_submit, homepage_is_submitted, text_mining_query): - if homepage_is_submitted and (text_mining_submitted_n_clicks >= 1 or text_mining_query_n_submit >= 1): - is_there_error, message = is_error(text_mining_query) - - if not is_there_error: - return {'display': 'none'}, message, True, text_mining_query - else: - return {'display': 'block'}, message, False, None - - raise PreventUpdate - - - @app.callback( - Output('text-mining-results-container', 'style'), - Input('text-mining-is-submitted', 'data') - ) - def display_coexpression_output(text_mining_is_submitted): - if text_mining_is_submitted: - return {'display': 'block'} - - else: - return {'display': 'none'} - - - @app.callback( - Output('text-mining-result-table', 'data'), - Output('text-mining-result-table', 'columns'), - Output('text-mining-results-stats', 'children'), - - Input('text-mining-is-submitted', 'data'), - State('homepage-is-submitted', 'data'), - State('text-mining-query-submitted-input', 'data') - ) - def display_text_mining_results(text_mining_is_submitted, homepage_submitted, text_mining_query_submitted_input): - if homepage_submitted and text_mining_is_submitted: - query_string = text_mining_query_submitted_input - - is_there_error, _ = is_error(query_string) - if not is_there_error: - text_mining_results_df = text_mining_query_search(query_string) - - columns = [{'id': x, 'name': x, 'presentation': 'markdown'} - for x in text_mining_results_df.columns] - - num_entries = get_num_entries(text_mining_results_df, "PMID") - num_unique_entries = get_num_unique_entries( - text_mining_results_df, "PMID") - - if num_entries == 1: - stats = f'Found {num_entries} match ' - else: - stats = f'Found {num_entries} matches ' - - if num_unique_entries == 1: - stats += f'across {num_unique_entries} publication' - else: - stats += f'across {num_unique_entries} publications' - - return text_mining_results_df.to_dict('records'), columns, stats - - raise PreventUpdate - - @app.callback( - Output('text-mining-result-table', 'filter_query'), - Input('text-mining-reset-table', 'n_clicks') - ) - def reset_table_filters(*_): - return '' +from dash import Input, Output, State, ctx, ALL, html, no_update +from dash.exceptions import PreventUpdate +from collections import namedtuple + +from .util import * +from ..lift_over import util as lift_over_util + +def init_callback(app): + + # to display user input interval in the top nav + @app.callback( + Output('text-mining-genomic-intervals-input', 'children'), + State('homepage-genomic-intervals-submitted-input', 'data'), + Input('homepage-is-submitted', 'data'), + Input('text-mining-submit', 'n_clicks') + ) + def display_input(nb_intervals_str, homepage_is_submitted, *_): + if homepage_is_submitted: + if nb_intervals_str and not lift_over_util.is_error(lift_over_util.get_genomic_intervals_from_input(nb_intervals_str)): + return [html.B('Your Input Intervals: '), html.Span(nb_intervals_str)] + else: + return None + + raise PreventUpdate + + @app.callback( + Output('text-mining-query-saved-input', 'data', allow_duplicate=True), + Input({'type': 'example-text-mining', + 'description': ALL}, 'n_clicks'), + prevent_initial_call=True + ) + def set_input_fields_with_preset_input(example_text_mining_n_clicks): + if ctx.triggered_id and not all(val == 0 for val in example_text_mining_n_clicks): + return ctx.triggered_id['description'] + + raise PreventUpdate + + + @app.callback( + Output('text-mining-query-saved-input', 'data', allow_duplicate=True), + Input('text-mining-query', 'value'), + prevent_initial_call=True + ) + def set_input_fields(query_string): + return query_string + + + @app.callback( + Output('text-mining-query', 'value'), + Input('text-mining-query-saved-input', 'data'), + ) + def get_input_homepage_session_state(query): + return query + + @app.callback( + Output('text-mining-input-error', 'style'), + Output('text-mining-input-error', 'children'), + + Output('text-mining-is-submitted', 'data', allow_duplicate=True), + Output('text-mining-query-submitted-input', + 'data', allow_duplicate=True), + Input('text-mining-submit', 'n_clicks'), + Input('text-mining-query', 'n_submit'), + State('homepage-is-submitted', 'data'), + State('text-mining-query', 'value'), + prevent_initial_call=True + ) + def submit_text_mining_input(text_mining_submitted_n_clicks, text_mining_query_n_submit, homepage_is_submitted, text_mining_query): + if homepage_is_submitted and (text_mining_submitted_n_clicks >= 1 or text_mining_query_n_submit >= 1): + is_there_error, message = is_error(text_mining_query) + + if not is_there_error: + return {'display': 'none'}, message, True, text_mining_query + else: + return {'display': 'block'}, message, no_update, no_update + + raise PreventUpdate + + + @app.callback( + Output('text-mining-results-container', 'style'), + Input('text-mining-is-submitted', 'data') + ) + def display_coexpression_output(text_mining_is_submitted): + if text_mining_is_submitted: + return {'display': 'block'} + + else: + return {'display': 'none'} + + @app.callback( + Output('text-mining-submit', 'disabled'), + Input('text-mining-submit', 'n_clicks'), + Input('text-mining-result-table', 'data'), + ) + def trigger(n_clicks, data): + context = ctx.triggered_id + + if context == 'text-mining-submit': + if n_clicks > 0 : + return True + else: + return False + else: + return False + + + @app.callback( + Output('text-mining-result-table', 'data'), + Output('text-mining-result-table', 'columns'), + Output('text-mining-results-stats', 'children'), + + State('text-mining-is-submitted', 'data'), + State('homepage-is-submitted', 'data'), + Input('text-mining-query-submitted-input', 'data') + ) + def display_text_mining_results(text_mining_is_submitted, homepage_submitted, text_mining_query_submitted_input): + if homepage_submitted and text_mining_is_submitted: + query_string = text_mining_query_submitted_input + + is_there_error, _ = is_error(query_string) + if not is_there_error: + text_mining_results_df = text_mining_query_search(query_string) + + columns = [{'id': x, 'name': x, 'presentation': 'markdown'} + for x in text_mining_results_df.columns] + + num_entries = get_num_entries(text_mining_results_df, "PMID") + num_unique_entries = get_num_unique_entries( + text_mining_results_df, "PMID") + + if num_entries == 1: + stats = f'Found {num_entries} match ' + else: + stats = f'Found {num_entries} matches ' + + if num_unique_entries == 1: + stats += f'across {num_unique_entries} publication' + else: + stats += f'across {num_unique_entries} publications' + + return text_mining_results_df.to_dict('records'), columns, stats + + raise PreventUpdate + + @app.callback( + Output('text-mining-result-table', 'filter_query'), + Input('text-mining-reset-table', 'n_clicks') + ) + def reset_table_filters(*_): + return '' diff --git a/callbacks/text_mining/util.py b/callbacks/text_mining/util.py index cbfdb487..2bf3f6fc 100644 --- a/callbacks/text_mining/util.py +++ b/callbacks/text_mining/util.py @@ -1,94 +1,94 @@ -import pandas as pd -from ..constants import Constants -from ..general_util import * -from ..links_util import * -import regex as re -import ftfy -from ..file_util import * - -const = Constants() -COLNAMES = ['Gene', 'PMID', 'Title', 'Sentence', 'Score'] - - -def sanitize_text(text): - # Sanitization of HTML tags should come first - text = re.sub(r'<\s+', '<', text) - text = re.sub(r'\s+>', '>', text) - text = re.sub(r'\s+', '>', text) - - text = re.sub(r'\s+', '>', text) + text = re.sub(r'\s+', '>', text) + + text = re.sub(r'\s+