From ed0f6952c4345b677f21903c0c0007fc6ff56544 Mon Sep 17 00:00:00 2001 From: Nick Joyce Date: Wed, 21 Jan 2015 23:11:42 +0000 Subject: [PATCH] Fix a string reference collision bug with AMF3 --- cpyamf/amf3.pxd | 2 +- cpyamf/amf3.pyx | 2 +- cpyamf/codec.pxd | 11 +++++++++++ cpyamf/codec.pyx | 21 ++++++++++++++++++++- pyamf/amf3.py | 4 ++-- pyamf/codec.py | 34 ++++++++++++++++++++++++++++------ 6 files changed, 63 insertions(+), 11 deletions(-) diff --git a/cpyamf/amf3.pxd b/cpyamf/amf3.pxd index 8a1db9b6..a32ef444 100644 --- a/cpyamf/amf3.pxd +++ b/cpyamf/amf3.pxd @@ -20,7 +20,7 @@ cdef class ClassDefinition(object): cdef class Context(codec.Context): - cdef codec.IndexedCollection strings + cdef codec.ByteStringReferenceCollection strings cdef dict classes cdef dict class_ref cdef dict proxied_objects diff --git a/cpyamf/amf3.pyx b/cpyamf/amf3.pyx index 118e7d48..1b587c47 100644 --- a/cpyamf/amf3.pyx +++ b/cpyamf/amf3.pyx @@ -141,7 +141,7 @@ cdef class Context(codec.Context): """ def __cinit__(self): - self.strings = codec.IndexedCollection(use_hash=1) + self.strings = codec.ByteStringReferenceCollection() self.classes = {} self.class_ref = {} self.proxied_objects = {} diff --git a/cpyamf/codec.pxd b/cpyamf/codec.pxd index d779b8fb..3da87ec9 100644 --- a/cpyamf/codec.pxd +++ b/cpyamf/codec.pxd @@ -29,6 +29,17 @@ cdef class IndexedCollection(object): cpdef Py_ssize_t append(self, object obj) except -1 +cdef class ByteStringReferenceCollection(IndexedCollection): + """ + There have been rare hash collisions within a single AMF payload causing + corrupt payloads. + + Which strings cause collisions is dependent on the python runtime, each + platform might have a slightly different implementation which means that + testing is extremely difficult. + """ + + cdef class Context(object): """ C based version of ``pyamf.BaseContext`` diff --git a/cpyamf/codec.pyx b/cpyamf/codec.pyx index 8a1a27d3..7a522aee 100644 --- a/cpyamf/codec.pyx +++ b/cpyamf/codec.pyx @@ -120,7 +120,7 @@ cdef class IndexedCollection(object): return self.data[ref] - cdef inline object _ref(self, object obj): + cdef object _ref(self, object obj): if self.use_hash: return hash(obj) @@ -198,6 +198,25 @@ cdef class IndexedCollection(object): return n +cdef class ByteStringReferenceCollection(IndexedCollection): + """ + There have been rare hash collisions within a single AMF payload causing + corrupt payloads. + + Which strings cause collisions is dependent on the python runtime, each + platform might have a slightly different implementation which means that + testing is extremely difficult. + """ + + cdef object _ref(self, object obj): + return obj + + def __copy__(self): + cdef ByteStringReferenceCollection n = ByteStringReferenceCollection() + + return n + + cdef class Context(object): """ I hold the AMF context for en/decoding streams. diff --git a/pyamf/amf3.py b/pyamf/amf3.py index 06af6f42..9c9ca5d9 100644 --- a/pyamf/amf3.py +++ b/pyamf/amf3.py @@ -600,13 +600,13 @@ class Context(codec.Context): I hold the AMF3 context for en/decoding streams. @ivar strings: A list of string references. - @type strings: C{list} + @type strings: L{codec.ByteStringReferenceCollection} @ivar classes: A list of L{ClassDefinition}. @type classes: C{list} """ def __init__(self): - self.strings = codec.IndexedCollection(use_hash=True) + self.strings = codec.ByteStringReferenceCollection() self.classes = {} self.class_ref = {} diff --git a/pyamf/codec.py b/pyamf/codec.py index e34927db..e51904ae 100644 --- a/pyamf/codec.py +++ b/pyamf/codec.py @@ -120,6 +120,30 @@ def __repr__(self): id(self)) +class ByteStringReferenceCollection(IndexedCollection): + """ + There have been rare hash collisions within a single AMF payload causing + corrupt payloads. + + Which strings cause collisions is dependent on the python runtime, each + platform might have a slightly different implementation which means that + testing is extremely difficult. + """ + + def __init__(self, *args, **kwargs): + super(ByteStringReferenceCollection, self).__init__(use_hash=False) + + def getReferenceTo(self, byte_string): + return self.dict.get(byte_string, -1) + + def append(self, byte_string): + self.list.append(byte_string) + idx = len(self.list) - 1 + self.dict[byte_string] = idx + + return idx + + class Context(object): """ The base context for all AMF [de|en]coding. @@ -215,13 +239,12 @@ def getStringForBytes(self, s): @since: 0.6 """ - h = hash(s) - u = self._unicodes.get(h, None) + u = self._unicodes.get(s, None) if u is not None: return u - u = self._unicodes[h] = s.decode('utf-8') + u = self._unicodes[s] = s.decode('utf-8') return u @@ -232,13 +255,12 @@ def getBytesForString(self, u): @since: 0.6 """ - h = hash(u) - s = self._unicodes.get(h, None) + s = self._unicodes.get(u, None) if s is not None: return s - s = self._unicodes[h] = u.encode('utf-8') + s = self._unicodes[u] = u.encode('utf-8') return s