Source code for dendropy.dataio.phylipwriter
#! /usr/bin/env python
# -*- coding: utf-8 -*-
##############################################################################
## DendroPy Phylogenetic Computing Library.
##
## Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
## All rights reserved.
##
## See "LICENSE.rst" for terms and conditions of usage.
##
## If you use this work or any portion thereof in published work,
## please cite it as:
##
## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
## for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################
"""
Implementation of PHYLIP-format data writer.
"""
from dendropy.dataio import ioservice
from dendropy.utility import textprocessing
STRICT_MODE_MAX_LABEL_LENGTH = 10
class PhylipWriter(ioservice.DataWriter):
"Implements the DataWriter interface for writing PHYLIP files."
[docs]
def __init__(self, **kwargs):
"""
Keyword Arguments
-----------------
strict : bool
If |True|, use 'strict' format, i.e., taxon labels given in
first 10 characters, followed by sequence starting at character 11.
Default is |False|: use 'relaxed' format, with arbitrary-length
taxon labels separated from sequences by two or more spaces.
spaces_to_underscores : bool
If |True|, all spaces will be converted to underscores. Default is
|False|: spaces will be preserved.
taxon_label_fn: function object
If specified, then this function will be called everytime taxon
label is required. It will be passed a |Taxon| object as an
argument and should represent the string or string-like object that
should serve as the label.
force_unique_taxon_labels : bool
If |True|, then taxon labels will be modified to avoid duplicate
labels. Default is |False|: taxon labels will not be modified.
suppress_missing_taxa : bool
If |True|, then taxa with zero characters will not be printed
Default is |False|: all taxa will be printed
ignore_unrecognized_keyword_arguments : boolean, default: |False|
If |True|, then unsupported or unrecognized keyword arguments will
not result in an error. Default is |False|: unsupported keyword
arguments will result in an error.
"""
ioservice.DataWriter.__init__(self, **kwargs)
self.strict = kwargs.pop("strict", False)
self.spaces_to_underscores = kwargs.pop("spaces_to_underscores", False)
self.force_unique_taxon_labels = kwargs.pop("force_unique_taxon_labels", False)
self.suppress_missing_taxa = kwargs.pop("suppress_missing_taxa", False)
self.taxon_label_fn = kwargs.pop("taxon_label_fn", None)
if self.taxon_label_fn is None:
self.taxon_label_fn = lambda taxon: taxon.label
self.check_for_unused_keyword_arguments(kwargs)
def _write(self,
stream,
taxon_namespaces=None,
tree_lists=None,
char_matrices=None,
global_annotations_target=None):
for char_matrix in char_matrices:
if (self.attached_taxon_namespace is not None
and char_matrix.taxon_namespace is not self.attached_taxon_namespace):
continue
self._write_char_matrix(stream, char_matrix)
def _write_char_matrix(self, stream, char_matrix):
"Writes dataset to a full PHYLIP document."
if self.strict or self.force_unique_taxon_labels:
taxon_label_map = self.get_taxon_label_map(char_matrix.taxon_namespace)
if not self.strict:
spacer = " "
else:
spacer = ""
else:
taxon_label_map = {}
for taxon in char_matrix.taxon_namespace:
label = self.taxon_label_fn(taxon)
if self.spaces_to_underscores:
label = label.replace(' ', '_')
taxon_label_map[taxon] = label
spacer = " "
maxlen = max([len(str(label)) for label in taxon_label_map.values()])
n_seqs = len(char_matrix)
n_sites = char_matrix.max_sequence_size
stream.write("%d %d\n" % (n_seqs, n_sites))
for taxon in char_matrix.taxon_namespace:
label = taxon_label_map[taxon]
if taxon in char_matrix:
seq_vec = char_matrix[taxon].symbols_as_string()
else:
seq_vec = ""
if len(seq_vec) or (not self.suppress_missing_taxa):
stream.write("%s%s%s\n" % ( label.ljust(maxlen), spacer, str(seq_vec)))
def get_taxon_label_map(self, taxon_namespace):
taxon_label_map = {}
if self.strict:
max_label_len = STRICT_MODE_MAX_LABEL_LENGTH
else:
max_label_len = 0
for taxon in taxon_namespace:
label = self.taxon_label_fn(taxon)
if self.spaces_to_underscores:
label = label.replace(' ', '_')
if self.strict:
label = label[:max_label_len]
taxon_label_map[taxon] = label
taxon_label_map = textprocessing.unique_taxon_label_map(taxon_namespace, taxon_label_map, max_label_len)
if self.strict:
for t in taxon_label_map:
label = taxon_label_map[t]
if len(label) < STRICT_MODE_MAX_LABEL_LENGTH:
taxon_label_map[t] = label.ljust(STRICT_MODE_MAX_LABEL_LENGTH)
return taxon_label_map