""" Module that allows plotting of string "category" data. i.e. ``plot(['d', 'f', 'a'],[1, 2, 3])`` will plot three points with x-axis values of 'd', 'f', 'a'. See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an example. The module uses Matplotlib's `matplotlib.units` mechanism to convert from strings to integers, provides a tick locator and formatter, and the class:`.UnitData` that creates and stores the string-to-integer mapping. """ from collections import OrderedDict import dateutil.parser import itertools import logging import numpy as np import matplotlib.cbook as cbook import matplotlib.units as units import matplotlib.ticker as ticker _log = logging.getLogger(__name__) class StrCategoryConverter(units.ConversionInterface): @staticmethod def convert(value, unit, axis): """Convert strings in value to floats using mapping information store in the unit object. Parameters ---------- value : string or iterable Value or list of values to be converted. unit : `.UnitData` An object mapping strings to integers. axis : `~matplotlib.axis.Axis` axis on which the converted value is plotted. .. note:: *axis* is unused. Returns ------- mapped_value : float or ndarray[float] """ if unit is None: raise ValueError( 'Missing category information for StrCategoryConverter; ' 'this might be caused by unintendedly mixing categorical and ' 'numeric data') # dtype = object preserves numerical pass throughs values = np.atleast_1d(np.array(value, dtype=object)) # pass through sequence of non binary numbers if all((units.ConversionInterface.is_numlike(v) and not isinstance(v, (str, bytes))) for v in values): return np.asarray(values, dtype=float) # force an update so it also does type checking unit.update(values) str2idx = np.vectorize(unit._mapping.__getitem__, otypes=[float]) mapped_value = str2idx(values) return mapped_value @staticmethod def axisinfo(unit, axis): """Sets the default axis ticks and labels Parameters ---------- unit : `.UnitData` object string unit information for value axis : `~matplotlib.Axis.axis` axis for which information is being set Returns ------- axisinfo : `~matplotlib.units.AxisInfo` Information to support default tick labeling .. note: axis is not used """ # locator and formatter take mapping dict because # args need to be pass by reference for updates majloc = StrCategoryLocator(unit._mapping) majfmt = StrCategoryFormatter(unit._mapping) return units.AxisInfo(majloc=majloc, majfmt=majfmt) @staticmethod def default_units(data, axis): """Sets and updates the :class:`~matplotlib.Axis.axis` units. Parameters ---------- data : string or iterable of strings axis : `~matplotlib.Axis.axis` axis on which the data is plotted Returns ------- class : `.UnitData` object storing string to integer mapping """ # the conversion call stack is supposed to be # default_units->axis_info->convert if axis.units is None: axis.set_units(UnitData(data)) else: axis.units.update(data) return axis.units class StrCategoryLocator(ticker.Locator): """tick at every integer mapping of the string data""" def __init__(self, units_mapping): """ Parameters ----------- units_mapping : Dict[str, int] string:integer mapping """ self._units = units_mapping def __call__(self): return list(self._units.values()) def tick_values(self, vmin, vmax): return self() class StrCategoryFormatter(ticker.Formatter): """String representation of the data at every tick""" def __init__(self, units_mapping): """ Parameters ---------- units_mapping : Dict[Str, int] string:integer mapping """ self._units = units_mapping def __call__(self, x, pos=None): if pos is None: return "" r_mapping = {v: StrCategoryFormatter._text(k) for k, v in self._units.items()} return r_mapping.get(int(np.round(x)), '') @staticmethod def _text(value): """Converts text values into utf-8 or ascii strings. """ if isinstance(value, bytes): value = value.decode(encoding='utf-8') elif not isinstance(value, str): value = str(value) return value class UnitData(object): def __init__(self, data=None): """ Create mapping between unique categorical values and integer ids. Parameters ---------- data : iterable sequence of string values """ self._mapping = OrderedDict() self._counter = itertools.count() if data is not None: self.update(data) @staticmethod def _str_is_convertible(val): """ Helper method to see if a string can be cast to float or parsed as date. """ try: float(val) except ValueError: try: dateutil.parser.parse(val) except ValueError: return False return True def update(self, data): """Maps new values to integer identifiers. Parameters ---------- data : iterable sequence of string values Raises ------ TypeError If the value in data is not a string, unicode, bytes type """ data = np.atleast_1d(np.array(data, dtype=object)) # check if convertible to number: convertible = True for val in OrderedDict.fromkeys(data): # OrderedDict just iterates over unique values in data. if not isinstance(val, (str, bytes)): raise TypeError("{val!r} is not a string".format(val=val)) if convertible: # this will only be called so long as convertible is True. convertible = self._str_is_convertible(val) if val not in self._mapping: self._mapping[val] = next(self._counter) if convertible: _log.info('Using categorical units to plot a list of strings ' 'that are all parsable as floats or dates. If these ' 'strings should be plotted as numbers, cast to the ' 'appropriate data type before plotting.') # Register the converter with Matplotlib's unit framework units.registry[str] = StrCategoryConverter() units.registry[np.str_] = StrCategoryConverter() units.registry[bytes] = StrCategoryConverter() units.registry[np.bytes_] = StrCategoryConverter()