diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..b7a704b --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/beast2xml/__init__.py b/beast2xml/__init__.py index 5af8f93..8c4f0a1 100644 --- a/beast2xml/__init__.py +++ b/beast2xml/__init__.py @@ -1,7 +1,4 @@ from beast2xml.beast2 import BEAST2XML -# Note that the version string must have the following format, otherwise it -# will not be found by the version() function in ../setup.py -__version__ = "1.1.0" __all__ = ["BEAST2XML"] diff --git a/beast2xml/beast2.py b/beast2xml/beast2.py index b2f2a0a..978b745 100644 --- a/beast2xml/beast2.py +++ b/beast2xml/beast2.py @@ -6,6 +6,9 @@ from beast2xml.date_utilities import date_to_decimal import xml.etree.ElementTree as ET import xml +import ete3 + +import warnings from importlib.resources import files @@ -110,16 +113,17 @@ def __init__( self._age_by_full_id = {} self._age_by_short_id = {} self._date_unit = date_unit + self._initial_phylo_tree = None @staticmethod def find_elements(tree): """ - Check that an XML tree has the required structure and return the found + Check that an XML file_path has the required structure and return the found elements. Parameters ---------- - tree : xml.etree.ElementTree.Element + file_path : xml.etree.ElementTree.Element Returns @@ -321,7 +325,7 @@ def _to_xml_tree( Specifying how often to write to the trace log file. If None, the value in the template will be retained. tree_log_every : int, default=None - Specifying how often to write to the tree log file. If None, the value in the + Specifying how often to write to the file_path log file. If None, the value in the template will be retained. screen_log_every : int, default=None Specifying how often to write to the terminal (screen) log. If None, the @@ -338,7 +342,7 @@ def _to_xml_tree( Returns ------- - tree: xml.etree.ElementTree + file_path: xml.etree.ElementTree ElementTree for running on BEAST """ if mimic_beauti: @@ -352,18 +356,71 @@ def _to_xml_tree( data = elements["data"] data_id = data.get("id") tree_logger_key = "./run/logger[@id='treelog.t:" + data_id + "']" + trait = elements["./run/state/tree/trait"] + # Delete any existing children of the data node. delete_child_nodes(data) - trait = elements["./run/state/tree/trait"] - if not isinstance(default_age, (float, int)): raise TypeError("The default age must be an integer or float.") + sequences = self._sequences age_by_short_id = deepcopy(self._age_by_short_id) + if self._initial_phylo_tree is not None: + tip_set_diffs = self.set_diffs_initial_tree_and_sequences() + if tip_set_diffs["in initial tree"]: + raise ValueError( + "Initial tree has additional sequences to the ones you have added." + ) + if tip_set_diffs["in sequences"]: + warnings.warn( + "\n".join( + [ + "One or more you have added sequences are not represented in the initial tree you gave.", + "These sequences will not be added to the xml being generated.", + "Use method set_diffs_initial_tree_and_sequences to view these.", + ] + ) + ) + sequences = Reads( + [ + sequence + for sequence in self._sequences + if sequence.id not in tip_set_diffs["in sequences"] + ] + ) + age_by_short_id = { + key: age + for key, age in self._age_by_short_id.items() + if key not in tip_set_diffs["in sequences"] + } + + initial_tree_nodes = self._tree.findall("./run/init") + if len(initial_tree_nodes) == 0: + raise ValueError("Template has no initial tree.") + if len(initial_tree_nodes) > 1: + raise ValueError( + "More than one intial tree is in the template xml BEAST2-xml only supports template xmls with one initial tree." + ) + elements["run"].remove(initial_tree_nodes[0]) + newick_tree = self._initial_phylo_tree.write( + format=self._initial_phylo_tree_format + ) + replacement = ET.SubElement( + elements["run"], + "init", + spec="beast.util.TreeParser", + id="NewickTree.t:" + data_id, + initial="@Tree.t:" + data_id, + taxa="@" + data_id, + IsLabelledNewick="true", + adjustTipHeights="false", + newick=newick_tree, + ) + # Add in all sequences. for sequence in sorted( - self._sequences + sequences ): # Sorting adds the sequences alphabetically like in BEAUti. seq_id = sequence.id short_id = seq_id.split()[0] @@ -381,7 +438,7 @@ def _to_xml_tree( ) trait_order = [ - sequence.id.split()[0] for sequence in self._sequences + sequence.id.split()[0] for sequence in sequences ] # ensures order is the same as BEAUti's. trait_text = [ short_id + "=" + str(age_by_short_id[short_id]) for short_id in trait_order @@ -478,7 +535,7 @@ def to_string( Specifying how often to write to the trace log file. If None, the value in the template will be retained. tree_log_every: int, default=None - Specifying how often to write to the tree log file. If None, the value in the + Specifying how often to write to the file_path log file. If None, the value in the template will be retained. screen_log_every: int, default=None Specifying how often to write to the terminal (screen) log. If None, the @@ -495,7 +552,7 @@ def to_string( Returns ------- - tree: str + file_path: str String representation of xml.etree.ElementTree for running on BEAST """ tree = self._to_xml_tree( @@ -555,7 +612,7 @@ def to_xml( Specifying how often to write to the trace log file. If None, the value in the template will be retained. tree_log_every: int, default=None - Specifying how often to write to the tree log file. If None, the value in the + Specifying how often to write to the file_path log file. If None, the value in the template will be retained. screen_log_every: int, default=None Specifying how often to write to the terminal (screen) log. If None, the @@ -859,3 +916,40 @@ def add_rate_change_times(self, parameter, times): self.change_parameter_state_node( self._rate_change_to_param_dict[parameter], dimension=dimensions ) + + def add_initial_tree(self, file_path, format=1): + """ + Add initial newick tree. + + Parameters + ---------- + file_path: str + Path to the newick tree file. + format: int, default 1 + Format of the newick tree file: + 0 flexible with support values + 1 flexible with internal node names + 2 all branches + leaf names + internal supports + 3 all branches + all names + 4 leaf branches + leaf names + 5 internal and leaf branches + leaf names + 6 internal branches + leaf names + 7 leaf branches + all names + 8 all names + 9 leaf names + 100 topology only + + Returns + ------- + None + """ + self._initial_phylo_tree = ete3.Tree(file_path, format=format) + self._initial_phylo_tree_format = format + + def set_diffs_initial_tree_and_sequences(self): + tree_tips = set(self._initial_phylo_tree.get_leaf_names()) + sequence_tips = set([sequence.id for sequence in self._sequences]) + return { + "in initial tree": tree_tips - sequence_tips, + "in sequences": sequence_tips - tree_tips, + } diff --git a/setup.py b/setup.py index 517aace..2728a5a 100644 --- a/setup.py +++ b/setup.py @@ -3,30 +3,14 @@ from setuptools import setup -# Modified from http://stackoverflow.com/questions/2058802/ -# how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package -def version(): - import os - import re - - init = os.path.join("beast2xml", "__init__.py") - with open(init) as fp: - initData = fp.read() - match = re.search(r"^__version__ = ['\"]([^'\"]+)['\"]", initData, re.M) - if match: - return match.group(1) - else: - raise RuntimeError("Unable to find version string in %r." % init) - - setup( name="beast2-xml", - version=version(), + version="1.3.0", packages=["beast2xml"], package_data={"beast2xml": ["templates/*.xml"]}, url="https://github.com/acorg/beast2-xml", download_url="https://github.com/acorg/beast2-xml", - author="Terry Jones", + author="Terry Jonesl", author_email="tcj25@cam.ac.uk", keywords=["BEAST2", "XML"], classifiers=[ @@ -44,8 +28,11 @@ def version(): long_description=("Please see https://github.com/acorg/beast2-xml for details."), license="MIT", scripts=["bin/beast2-xml.py", "bin/beast2-xml-version.py"], - install_requires=["dark-matter>=1.1.28", - 'pandas>=2.2.2', - 'python=3.10', - 'six'], + install_requires=[ + "dark-matter>=1.1.28", + "pandas>=2.2.2", + "python=3.10", + "ete3", + "six", + ], )