Source code for amazon.ion.equivalence

# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at:
#
#    http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the
# License.

"""Provides utilities for determining whether two objects are equivalent under the Ion data model."""

# Python 2/3 compatibility
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import struct
from datetime import datetime
from decimal import Decimal
from math import isnan

import six

from amazon.ion.core import IonType, Timestamp, TimestampPrecision, MICROSECOND_PRECISION, OffsetTZInfo, Multimap
from amazon.ion.simple_types import _IonNature, IonPyList, IonPyDict, IonPyTimestamp, IonPyNull, IonPySymbol, \
    IonPyText, IonPyDecimal, IonPyFloat
from amazon.ion.symbols import SymbolToken


[docs]def ion_equals(a, b, timestamps_instants_only=False): """Tests two objects for equivalence under the Ion data model. There are three important cases: * When neither operand specifies its `ion_type` or `annotations`, this method will only return True when the values of both operands are equivalent under the Ion data model. * When only one of the operands specifies its `ion_type` and `annotations`, this method will only return True when that operand has no annotations and has a value equivalent to the other operand under the Ion data model. * When both operands specify `ion_type` and `annotations`, this method will only return True when the ion_type and annotations of both are the same and their values are equivalent under the Ion data model. Note that the order of the operands does not matter. Args: a (object): The first operand. b (object): The second operand. timestamps_instants_only (Optional[bool]): False if timestamp objects (datetime and its subclasses) should be compared according to the Ion data model (where the instant, precision, and offset must be equal); True if these objects should be considered equivalent if they simply represent the same instant. """ if timestamps_instants_only: return _ion_equals_timestamps_instants(a, b) return _ion_equals_timestamps_data_model(a, b)
def _ion_equals_timestamps_instants(a, b): return _ion_equals(a, b, _timestamp_instants_eq, _ion_equals_timestamps_instants) def _ion_equals_timestamps_data_model(a, b): return _ion_equals(a, b, _timestamps_eq, _ion_equals_timestamps_data_model) def _ion_equals(a, b, timestamp_comparison_func, recursive_comparison_func): """Compares a and b according to the description of the ion_equals method.""" for a, b in ((a, b), (b, a)): # Ensures that operand order does not matter. if isinstance(a, _IonNature): if isinstance(b, _IonNature): # Both operands have _IonNature. Their IonTypes and annotations must be equivalent. eq = a.ion_type is b.ion_type and _annotations_eq(a, b) else: # Only one operand has _IonNature. It cannot be equivalent to the other operand if it has annotations. eq = not a.ion_annotations if eq: if isinstance(a, IonPyList): return _sequences_eq(a, b, recursive_comparison_func) elif isinstance(a, IonPyDict): return _structs_eq(a, b, recursive_comparison_func) elif isinstance(a, IonPyTimestamp): return timestamp_comparison_func(a, b) elif isinstance(a, IonPyNull): return isinstance(b, IonPyNull) or (b is None and a.ion_type is IonType.NULL) elif isinstance(a, IonPySymbol) or (isinstance(a, IonPyText) and a.ion_type is IonType.SYMBOL): return _symbols_eq(a, b) elif isinstance(a, IonPyDecimal): return _decimals_eq(a, b) elif isinstance(a, IonPyFloat): return _floats_eq(a, b) else: return a == b return False # Reaching this point means that neither operand has _IonNature. for a, b in ((a, b), (b, a)): # Ensures that operand order does not matter. if isinstance(a, list): return _sequences_eq(a, b, recursive_comparison_func) elif isinstance(a, dict): return _structs_eq(a, b, recursive_comparison_func) elif isinstance(a, datetime): return timestamp_comparison_func(a, b) elif isinstance(a, SymbolToken): return _symbols_eq(a, b) elif isinstance(a, Decimal): return _decimals_eq(a, b) elif isinstance(a, float): return _floats_eq(a, b) return a == b def _annotations_eq(a, b): return _sequences_eq(a.ion_annotations, b.ion_annotations, _symbols_eq) def _sequences_eq(a, b, comparison_func): assert isinstance(a, (list, tuple)) if not isinstance(b, (list, tuple)): return False sequence_len = len(a) if sequence_len != len(b): return False for i in range(sequence_len): if not comparison_func(a[i], b[i]): return False return True def _structs_eq(a, b, comparison_func): assert isinstance(a, (dict, Multimap)) if not isinstance(b, (dict, Multimap)): return False dict_len = len(a) if dict_len != len(b): return False for a, b in ((a, b), (b, a)): key_iter = six.iterkeys(a) while True: try: key = next(key_iter) except StopIteration: break if key not in b: return False if isinstance(a, Multimap) and isinstance(b, Multimap): values_a = a.get_all_values(key) values_b = b.get_all_values(key) if len(values_a) != len(values_b): return False for value_a in values_a: if not any(comparison_func(value_a, value_b) for value_b in values_b): return False else: if not comparison_func(a[key], b[key]): return False return True def _timestamps_eq(a, b): """Compares two timestamp operands for equivalence under the Ion data model.""" assert isinstance(a, datetime) if not isinstance(b, datetime): return False # Local offsets must be equivalent. if (a.tzinfo is None) ^ (b.tzinfo is None): return False if a.utcoffset() != b.utcoffset(): return False for a, b in ((a, b), (b, a)): if isinstance(a, Timestamp): if isinstance(b, Timestamp): # Both operands declare their precisions. They are only equivalent if their precisions are the same. if a.precision is b.precision and a.fractional_precision is b.fractional_precision \ and a.fractional_seconds == b.fractional_seconds: break return False elif a.precision is not TimestampPrecision.SECOND or a.fractional_precision != MICROSECOND_PRECISION: # Only one of the operands declares its precision. It is only equivalent to the other (a naive datetime) # if it has full microseconds precision. return False return a == b def _timestamp_instants_eq(a, b): """Compares two timestamp operands for point-in-time equivalence only.""" assert isinstance(a, datetime) if not isinstance(b, datetime): return False # datetime's __eq__ can't compare a None offset and a non-None offset. For these equivalence semantics, a None # offset (unknown local offset) is treated equivalently to a +00:00. if a.tzinfo is None: a = a.replace(tzinfo=OffsetTZInfo()) if b.tzinfo is None: b = b.replace(tzinfo=OffsetTZInfo()) # datetime's __eq__ implementation compares instants; offsets and precision need not be equal. return a == b def _symbols_eq(a, b): assert isinstance(a, (six.text_type, SymbolToken)) if not isinstance(b, (six.text_type, SymbolToken)): return False a_text = getattr(a, 'text', a) b_text = getattr(b, 'text', b) if a_text == b_text: if a_text is None: # Both have unknown text. If they come from a local context, they are equivalent. a_location = getattr(a, 'location', None) b_location = getattr(b, 'location', None) if (a_location is None) ^ (b_location is None): return False if a_location is not None: # Both were imported from shared symbol tables. In this case, they are only equivalent if they were # imported from the same position in the same shared symbol table. if (a_location.name != b_location.name) or (a_location.position != b_location.position): return False a_sid = getattr(a, 'sid', None) b_sid = getattr(b, 'sid', None) if a_sid is None or b_sid is None: raise ValueError('Attempted to compare malformed symbols %s, %s.' % (a, b)) if (a_sid == 0) ^ (b_sid == 0): # SID 0 is only equal to SID 0. return False return True return False def _decimals_eq(a, b): assert isinstance(a, Decimal) if not isinstance(b, Decimal): return False if a.is_zero() and b.is_zero(): if a.is_signed() ^ b.is_signed(): # Negative-zero is not equivalent to positive-zero. return False # This ensures that both have equal precision. return a.canonical().compare_total(b.canonical()) == 0 def _is_float_negative_zero(x): return struct.pack('>d', x) == b'\x80\x00\x00\x00\x00\x00\x00\x00' def _floats_eq(a, b): assert isinstance(a, float) if not isinstance(b, float): return False if a == 0 and b == 0: # Negative-zero is not equivalent to positive-zero. return not (_is_float_negative_zero(a) ^ _is_float_negative_zero(b)) # nan is always equivalent to nan. return a == b or (isnan(a) and isnan(b))