""" define the IntervalIndex """ import numpy as np from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCPeriodIndex from pandas.core.dtypes.dtypes import IntervalDtype from pandas.core.dtypes.common import ( _ensure_platform_int, is_list_like, is_datetime_or_timedelta_dtype, is_integer_dtype, is_object_dtype, is_categorical_dtype, is_float_dtype, is_interval_dtype, is_scalar, is_float, is_number, is_integer) from pandas.core.indexes.base import ( Index, _ensure_index, default_pprint, _index_shared_docs) from pandas._libs import Timestamp, Timedelta from pandas._libs.interval import ( Interval, IntervalMixin, IntervalTree, intervals_to_interval_bounds) from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( dict(klass='IntervalIndex', target_klass='IntervalIndex or list of Intervals')) _VALID_CLOSED = set(['left', 'right', 'both', 'neither']) def _get_next_label(label): dtype = getattr(label, 'dtype', type(label)) if isinstance(label, (Timestamp, Timedelta)): dtype = 'datetime64' if is_datetime_or_timedelta_dtype(dtype): return label + np.timedelta64(1, 'ns') elif is_integer_dtype(dtype): return label + 1 elif is_float_dtype(dtype): return np.nextafter(label, np.infty) else: raise TypeError('cannot determine next label for type %r' % type(label)) def _get_prev_label(label): dtype = getattr(label, 'dtype', type(label)) if isinstance(label, (Timestamp, Timedelta)): dtype = 'datetime64' if is_datetime_or_timedelta_dtype(dtype): return label - np.timedelta64(1, 'ns') elif is_integer_dtype(dtype): return label - 1 elif is_float_dtype(dtype): return np.nextafter(label, -np.infty) else: raise TypeError('cannot determine next label for type %r' % type(label)) def _get_interval_closed_bounds(interval): """ Given an Interval or IntervalIndex, return the corresponding interval with closed bounds. """ left, right = interval.left, interval.right if interval.open_left: left = _get_next_label(left) if interval.open_right: right = _get_prev_label(right) return left, right def _new_IntervalIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ return cls.from_arrays(**d) class IntervalIndex(IntervalMixin, Index): """ Immutable Index implementing an ordered, sliceable set. IntervalIndex represents an Index of intervals that are all closed on the same side. .. versionadded:: 0.20.0 .. warning:: The indexing behaviors are provisional and may change in a future version of pandas. Attributes ---------- left, right : array-like (1-dimensional) Left and right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False Copy the meta-data Examples --------- A new ``IntervalIndex`` is typically constructed using :func:`interval_range`: >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') It may also be constructed using one of the constructor methods :meth:`IntervalIndex.from_arrays`, :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals` and :meth:`IntervalIndex.from_tuples`. See further examples in the doc strings of ``interval_range`` and the mentioned constructor methods. Notes ------ See the `user guide `_ for more. See Also -------- Index : The base pandas Index type Interval : A bounded slice-like interval interval_range : Function to create a fixed frequency IntervalIndex, IntervalIndex.from_arrays, IntervalIndex.from_breaks, IntervalIndex.from_intervals, IntervalIndex.from_tuples cut, qcut : convert arrays of continuous data into categoricals/series of ``Interval``. """ _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] _allow_index_ops = True # we would like our indexing holder to defer to us _defer_to_indexing = True _mask = None def __new__(cls, data, closed='right', name=None, copy=False, dtype=None, fastpath=False, verify_integrity=True): if fastpath: return cls._simple_new(data.left, data.right, closed, name, copy=copy, verify_integrity=False) if name is None and hasattr(data, 'name'): name = data.name if isinstance(data, IntervalIndex): left = data.left right = data.right closed = data.closed else: # don't allow scalars if is_scalar(data): cls._scalar_data_error(data) data = IntervalIndex.from_intervals(data, name=name) left, right, closed = data.left, data.right, data.closed return cls._simple_new(left, right, closed, name, copy=copy, verify_integrity=verify_integrity) @classmethod def _simple_new(cls, left, right, closed=None, name=None, copy=False, verify_integrity=True): result = IntervalMixin.__new__(cls) if closed is None: closed = 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) if is_float_dtype(right) and is_integer_dtype(left): left = left.astype(right.dtype) if type(left) != type(right): raise ValueError("must not have differing left [{}] " "and right [{}] types".format( type(left), type(right))) if isinstance(left, ABCPeriodIndex): raise ValueError("Period dtypes are not supported, " "use a PeriodIndex instead") result._left = left result._right = right result._closed = closed result.name = name if verify_integrity: result._validate() result._reset_identity() return result @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, left=None, right=None, **kwargs): if left is None: # no values passed left, right = self.left, self.right elif right is None: # only single value passed, could be an IntervalIndex # or array of Intervals if not isinstance(left, IntervalIndex): left = type(self).from_intervals(left) left, right = left.left, left.right else: # both left and right are values pass attributes = self._get_attributes_dict() attributes.update(kwargs) attributes['verify_integrity'] = False return self._simple_new(left, right, **attributes) def _validate(self): """ Verify that the IntervalIndex is valid. """ if self.closed not in _VALID_CLOSED: raise ValueError("invalid options for 'closed': %s" % self.closed) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') left_mask = notna(self.left) right_mask = notna(self.right) if not (left_mask == right_mask).all(): raise ValueError('missing values must be missing in the same ' 'location both left and right sides') if not (self.left[left_mask] <= self.right[left_mask]).all(): raise ValueError('left side of interval must be <= right side') self._mask = ~left_mask @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ return self._isnan.any() @cache_readonly def _isnan(self): """ return if each value is nan""" if self._mask is None: self._mask = isna(self.left) return self._mask @cache_readonly def _engine(self): return IntervalTree(self.left, self.right, closed=self.closed) @property def _constructor(self): return type(self).from_intervals def __contains__(self, key): """ return a boolean if this key is IN the index We *only* accept an Interval Parameters ---------- key : Interval Returns ------- boolean """ if not isinstance(key, Interval): return False try: self.get_loc(key) return True except KeyError: return False def contains(self, key): """ return a boolean if this key is IN the index We accept / allow keys to be not *just* actual objects. Parameters ---------- key : int, float, Interval Returns ------- boolean """ try: self.get_loc(key) return True except KeyError: return False @classmethod def from_breaks(cls, breaks, closed='right', name=None, copy=False): """ Construct an IntervalIndex from an array of splits Parameters ---------- breaks : array-like (1-dimensional) Left and right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False copy the data Examples -------- >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ breaks = np.asarray(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, name=name, copy=copy) @classmethod def from_arrays(cls, left, right, closed='right', name=None, copy=False): """ Construct an IntervalIndex from a a left and right array Parameters ---------- left : array-like (1-dimensional) Left bounds for each interval. right : array-like (1-dimensional) Right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False copy the data Examples -------- >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ left = np.asarray(left) right = np.asarray(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True) @classmethod def from_intervals(cls, data, name=None, copy=False): """ Construct an IntervalIndex from a 1d array of Interval objects Parameters ---------- data : array-like (1-dimensional) Array of Interval objects. All intervals must be closed on the same sides. name : object, optional Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored Examples -------- >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1), ... pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') The generic Index constructor work identically when it infers an array of all intervals: >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ data = np.asarray(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False) @classmethod def from_tuples(cls, data, closed='right', name=None, copy=False): """ Construct an IntervalIndex from a list/array of tuples Parameters ---------- data : array-like (1-dimensional) Array of tuples closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored Examples -------- >>> pd.IntervalIndex.from_tuples([(0, 1), (1,2)]) IntervalIndex([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects """ left = [] right = [] for d in data: if isna(d): left.append(np.nan) right.append(np.nan) continue l, r = d left.append(l) right.append(r) # TODO # if we have nulls and we previous had *only* # integer data, then we have changed the dtype return cls.from_arrays(left, right, closed, name=name, copy=False) def to_tuples(self): return Index(com._asarray_tuplesafe(zip(self.left, self.right))) @cache_readonly def _multiindex(self): return MultiIndex.from_arrays([self.left, self.right], names=['left', 'right']) @property def left(self): return self._left @property def right(self): return self._right @property def closed(self): return self._closed def __len__(self): return len(self.left) @cache_readonly def values(self): """ Returns the IntervalIndex's data as a numpy array of Interval objects (with dtype='object') """ left = self.left right = self.right mask = self._isnan closed = self._closed result = np.empty(len(left), dtype=object) for i in range(len(left)): if mask[i]: result[i] = np.nan else: result[i] = Interval(left[i], right[i], closed) return result def __array__(self, result=None): """ the array interface, return my values """ return self.values def __array_wrap__(self, result, context=None): # we don't want the superclass implementation return result def _array_values(self): return self.values def __reduce__(self): d = dict(left=self.left, right=self.right) d.update(self._get_attributes_dict()) return _new_IntervalIndex, (self.__class__, d), None @Appender(_index_shared_docs['copy']) def copy(self, deep=False, name=None): left = self.left.copy(deep=True) if deep else self.left right = self.right.copy(deep=True) if deep else self.right name = name if name is not None else self.name closed = self.closed return type(self).from_arrays(left, right, closed=closed, name=name) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): if is_interval_dtype(dtype): if copy: self = self.copy() return self elif is_object_dtype(dtype): return Index(self.values, dtype=object) elif is_categorical_dtype(dtype): from pandas import Categorical return Categorical(self, ordered=True) raise ValueError('Cannot cast IntervalIndex to dtype %s' % dtype) @cache_readonly def dtype(self): return IntervalDtype.construct_from_string(str(self.left.dtype)) @property def inferred_type(self): return 'interval' @Appender(Index.memory_usage.__doc__) def memory_usage(self, deep=False): # we don't use an explict engine # so return the bytes here return (self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)) @cache_readonly def mid(self): """Returns the mid-point of each interval in the index as an array """ try: return Index(0.5 * (self.left.values + self.right.values)) except TypeError: # datetime safe version delta = self.right.values - self.left.values return Index(self.left.values + 0.5 * delta) @cache_readonly def is_monotonic(self): return self._multiindex.is_monotonic @cache_readonly def is_monotonic_increasing(self): return self._multiindex.is_monotonic_increasing @cache_readonly def is_monotonic_decreasing(self): return self._multiindex.is_monotonic_decreasing @cache_readonly def is_unique(self): return self._multiindex.is_unique @cache_readonly def is_non_overlapping_monotonic(self): # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) # we already require left <= right # strict inequality for closed == 'both'; equality implies overlapping # at a point when both sides of intervals are included if self.closed == 'both': return bool((self.right[:-1] < self.left[1:]).all() or (self.left[:-1] > self.right[1:]).all()) # non-strict inequality when closed != 'both'; at least one side is # not included in the intervals, so equality does not imply overlapping return bool((self.right[:-1] <= self.left[1:]).all() or (self.left[:-1] >= self.right[1:]).all()) @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): if kind == 'iloc': return super(IntervalIndex, self)._convert_scalar_indexer( key, kind=kind) return key def _maybe_cast_slice_bound(self, label, side, kind): return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): """ we are passed a list-like indexer. Return the indexer for matching intervals. """ locs = self.get_indexer_for(keyarr) # we have missing values if (locs == -1).any(): raise KeyError return locs def _maybe_cast_indexed(self, key): """ we need to cast the key, which could be a scalar or an array-like to the type of our subtype """ if isinstance(key, IntervalIndex): return key subtype = self.dtype.subtype if is_float_dtype(subtype): if is_integer(key): key = float(key) elif isinstance(key, (np.ndarray, Index)): key = key.astype('float64') elif is_integer_dtype(subtype): if is_integer(key): key = int(key) return key def _check_method(self, method): if method is None: return if method in ['bfill', 'backfill', 'pad', 'ffill', 'nearest']: raise NotImplementedError( 'method {} not yet implemented for ' 'IntervalIndex'.format(method)) raise ValueError("Invalid fill method") def _searchsorted_monotonic(self, label, side, exclude_label=False): if not self.is_non_overlapping_monotonic: raise KeyError('can only get slices from an IntervalIndex if ' 'bounds are non-overlapping and all monotonic ' 'increasing or decreasing') if isinstance(label, IntervalMixin): raise NotImplementedError if ((side == 'left' and self.left.is_monotonic_increasing) or (side == 'right' and self.left.is_monotonic_decreasing)): sub_idx = self.right if self.open_right or exclude_label: label = _get_next_label(label) else: sub_idx = self.left if self.open_left or exclude_label: label = _get_prev_label(label) return sub_idx._searchsorted_monotonic(label, side) def _get_loc_only_exact_matches(self, key): if isinstance(key, Interval): if not self.is_unique: raise ValueError("cannot index with a slice Interval" " and a non-unique index") # TODO: this expands to a tuple index, see if we can # do better return Index(self._multiindex.values).get_loc(key) raise KeyError def _find_non_overlapping_monotonic_bounds(self, key): if isinstance(key, IntervalMixin): start = self._searchsorted_monotonic( key.left, 'left', exclude_label=key.open_left) stop = self._searchsorted_monotonic( key.right, 'right', exclude_label=key.open_right) elif isinstance(key, slice): # slice start, stop = key.start, key.stop if (key.step or 1) != 1: raise NotImplementedError("cannot slice with a slice step") if start is None: start = 0 else: start = self._searchsorted_monotonic(start, 'left') if stop is None: stop = len(self) else: stop = self._searchsorted_monotonic(stop, 'right') else: # scalar or index-like start = self._searchsorted_monotonic(key, 'left') stop = self._searchsorted_monotonic(key, 'right') return start, stop def get_loc(self, key, method=None): """Get integer location, slice or boolean mask for requested label. Parameters ---------- key : label method : {None}, optional * default: matches where the label is within an interval only. Returns ------- loc : int if unique index, slice if monotonic index, else mask Examples --------- >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) >>> index = pd.IntervalIndex.from_intervals([i1, i2]) >>> index.get_loc(1) 0 You can also supply an interval or an location for a point inside an interval. >>> index.get_loc(pd.Interval(0, 2)) array([0, 1], dtype=int64) >>> index.get_loc(1.5) 1 If a label is in several intervals, you get the locations of all the relevant intervals. >>> i3 = pd.Interval(0, 2) >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) """ self._check_method(method) original_key = key key = self._maybe_cast_indexed(key) if self.is_non_overlapping_monotonic: if isinstance(key, Interval): left = self._maybe_cast_slice_bound(key.left, 'left', None) right = self._maybe_cast_slice_bound(key.right, 'right', None) key = Interval(left, right, key.closed) else: key = self._maybe_cast_slice_bound(key, 'left', None) start, stop = self._find_non_overlapping_monotonic_bounds(key) if start is None or stop is None: return slice(start, stop) elif start + 1 == stop: return start elif start < stop: return slice(start, stop) else: raise KeyError(original_key) else: # use the interval tree if isinstance(key, Interval): left, right = _get_interval_closed_bounds(key) return self._engine.get_loc_interval(left, right) else: return self._engine.get_loc(key) def get_value(self, series, key): if com.is_bool_indexer(key): loc = key elif is_list_like(key): loc = self.get_indexer(key) elif isinstance(key, slice): if not (key.step is None or key.step == 1): raise ValueError("cannot support not-default " "step in a slice") try: loc = self.get_loc(key) except TypeError: # we didn't find exact intervals # or are non-unique raise ValueError("unable to slice with " "this key: {}".format(key)) else: loc = self.get_loc(key) return series.iloc[loc] @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): self._check_method(method) target = _ensure_index(target) target = self._maybe_cast_indexed(target) if self.equals(target): return np.arange(len(self), dtype='intp') if self.is_non_overlapping_monotonic: start, stop = self._find_non_overlapping_monotonic_bounds(target) start_plus_one = start + 1 if not ((start_plus_one < stop).any()): return np.where(start_plus_one == stop, start, -1) if not self.is_unique: raise ValueError("cannot handle non-unique indices") # IntervalIndex if isinstance(target, IntervalIndex): indexer = self._get_reindexer(target) # non IntervalIndex else: indexer = np.concatenate([self.get_loc(i) for i in target]) return _ensure_platform_int(indexer) def _get_reindexer(self, target): """ Return an indexer for a target IntervalIndex with self """ # find the left and right indexers lindexer = self._engine.get_indexer(target.left.values) rindexer = self._engine.get_indexer(target.right.values) # we want to return an indexer on the intervals # however, our keys could provide overlapping of multiple # intervals, so we iterate thru the indexers and construct # a set of indexers indexer = [] n = len(self) for i, (l, r) in enumerate(zip(lindexer, rindexer)): target_value = target[i] # matching on the lhs bound if (l != -1 and self.closed == 'right' and target_value.left == self[l].right): l += 1 # matching on the lhs bound if (r != -1 and self.closed == 'left' and target_value.right == self[r].left): r -= 1 # not found if l == -1 and r == -1: indexer.append(np.array([-1])) elif r == -1: indexer.append(np.arange(l, n)) elif l == -1: # care about left/right closed here value = self[i] # target.closed same as self.closed if self.closed == target.closed: if target_value.left < value.left: indexer.append(np.array([-1])) continue # target.closed == 'left' elif self.closed == 'right': if target_value.left <= value.left: indexer.append(np.array([-1])) continue # target.closed == 'right' elif self.closed == 'left': if target_value.left <= value.left: indexer.append(np.array([-1])) continue indexer.append(np.arange(0, r + 1)) else: indexer.append(np.arange(l, r + 1)) return np.concatenate(indexer) @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = self._maybe_cast_indexed(_ensure_index(target)) return super(IntervalIndex, self).get_indexer_non_unique(target) @Appender(_index_shared_docs['where']) def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) return self._shallow_copy(values) def delete(self, loc): new_left = self.left.delete(loc) new_right = self.right.delete(loc) return self._shallow_copy(new_left, new_right) def insert(self, loc, item): if not isinstance(item, Interval): raise ValueError('can only insert Interval objects into an ' 'IntervalIndex') if not item.closed == self.closed: raise ValueError('inserted item must be closed on the same side ' 'as the index') new_left = self.left.insert(loc, item.left) new_right = self.right.insert(loc, item.right) return self._shallow_copy(new_left, new_right) def _as_like_interval_index(self, other, error_msg): self._assert_can_do_setop(other) other = _ensure_index(other) if (not isinstance(other, IntervalIndex) or self.closed != other.closed): raise ValueError(error_msg) return other def _concat_same_dtype(self, to_concat, name): """ assert that we all have the same .closed we allow a 0-len index here as well """ if not len(set([i.closed for i in to_concat if len(i)])) == 1: msg = ('can only append two IntervalIndex objects ' 'that are closed on the same side') raise ValueError(msg) return super(IntervalIndex, self)._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) left, right = self.left, self.right if fill_value is None: fill_value = self._na_value mask = indices == -1 if not mask.any(): # we won't change dtype here in this case # if we don't need allow_fill = False taker = lambda x: x.take(indices, allow_fill=allow_fill, fill_value=fill_value) try: new_left = taker(left) new_right = taker(right) except ValueError: # we need to coerce; migth have NA's in an # integer dtype new_left = taker(left.astype(float)) new_right = taker(right.astype(float)) return self._shallow_copy(new_left, new_right) def __getitem__(self, value): mask = self._isnan[value] if is_scalar(mask) and mask: return self._na_value left = self.left[value] right = self.right[value] # scalar if not isinstance(left, Index): return Interval(left, right, self.closed) return self._shallow_copy(left, right) # __repr__ associated methods are based on MultiIndex def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) def _format_native_types(self, na_rep='', quoting=None, **kwargs): """ actually format my specific types """ from pandas.io.formats.format import IntervalArrayFormatter return IntervalArrayFormatter(values=self, na_rep=na_rep, justify='all').get_result() def _format_data(self, name=None): # TODO: integrate with categorical and make generic # name argument is unused here; just for compat with base / categorical n = len(self) max_seq_items = min((get_option( 'display.max_seq_items') or n) // 10, 10) formatter = str if n == 0: summary = '[]' elif n == 1: first = formatter(self[0]) summary = '[{}]'.format(first) elif n == 2: first = formatter(self[0]) last = formatter(self[-1]) summary = '[{}, {}]'.format(first, last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] summary = '[{} ... {}]'.format(', '.join(head), ', '.join(tail)) else: head = [] tail = [formatter(x) for x in self] summary = '[{}]'.format(', '.join(tail)) return summary + self._format_space() def _format_attrs(self): attrs = [('closed', repr(self.closed))] if self.name is not None: attrs.append(('name', default_pprint(self.name))) attrs.append(('dtype', "'%s'" % self.dtype)) return attrs def _format_space(self): return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) def argsort(self, *args, **kwargs): return np.lexsort((self.right, self.left)) def equals(self, other): if self.is_(other): return True # if we can coerce to an II # then we can compare if not isinstance(other, IntervalIndex): if not is_interval_dtype(other): return False other = Index(getattr(other, '.values', other)) return (self.left.equals(other.left) and self.right.equals(other.right) and self.closed == other.closed) def _setop(op_name): def func(self, other): msg = ('can only do set operations between two IntervalIndex ' 'objects that are closed on the same side') other = self._as_like_interval_index(other, msg) result = getattr(self._multiindex, op_name)(other._multiindex) result_name = self.name if self.name == other.name else None return type(self).from_tuples(result.values, closed=self.closed, name=result_name) return func union = _setop('union') intersection = _setop('intersection') difference = _setop('difference') symmetric_differnce = _setop('symmetric_difference') # TODO: arithmetic operations IntervalIndex._add_logical_methods_disabled() def _is_valid_endpoint(endpoint): """helper for interval_range to check if start/end are valid types""" return any([is_number(endpoint), isinstance(endpoint, Timestamp), isinstance(endpoint, Timedelta), endpoint is None]) def _is_type_compatible(a, b): """helper for interval_range to check type compat of start/end/freq""" is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) return ((is_number(a) and is_number(b)) or (is_ts_compat(a) and is_ts_compat(b)) or (is_td_compat(a) and is_td_compat(b)) or com._any_none(a, b)) def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)