Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a string reference collision bug with AMF3 #36

Merged
merged 1 commit into from
Jan 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpyamf/amf3.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ cdef class ClassDefinition(object):


cdef class Context(codec.Context):
cdef codec.IndexedCollection strings
cdef codec.ByteStringReferenceCollection strings
cdef dict classes
cdef dict class_ref
cdef dict proxied_objects
Expand Down
2 changes: 1 addition & 1 deletion cpyamf/amf3.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ cdef class Context(codec.Context):
"""

def __cinit__(self):
self.strings = codec.IndexedCollection(use_hash=1)
self.strings = codec.ByteStringReferenceCollection()
self.classes = {}
self.class_ref = {}
self.proxied_objects = {}
Expand Down
11 changes: 11 additions & 0 deletions cpyamf/codec.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ cdef class IndexedCollection(object):
cpdef Py_ssize_t append(self, object obj) except -1


cdef class ByteStringReferenceCollection(IndexedCollection):
"""
There have been rare hash collisions within a single AMF payload causing
corrupt payloads.

Which strings cause collisions is dependent on the python runtime, each
platform might have a slightly different implementation which means that
testing is extremely difficult.
"""


cdef class Context(object):
"""
C based version of ``pyamf.BaseContext``
Expand Down
21 changes: 20 additions & 1 deletion cpyamf/codec.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ cdef class IndexedCollection(object):

return <object>self.data[ref]

cdef inline object _ref(self, object obj):
cdef object _ref(self, object obj):
if self.use_hash:
return hash(obj)

Expand Down Expand Up @@ -198,6 +198,25 @@ cdef class IndexedCollection(object):
return n


cdef class ByteStringReferenceCollection(IndexedCollection):
"""
There have been rare hash collisions within a single AMF payload causing
corrupt payloads.

Which strings cause collisions is dependent on the python runtime, each
platform might have a slightly different implementation which means that
testing is extremely difficult.
"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't think the use of cdef IndexedCollection s = self in __richcmp__ will be a problem?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - since they are accessing the same properties within IndexedCollection (and nothing in ByteStringReferenceCollection)


cdef object _ref(self, object obj):
return obj

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very clever way to shorten the change of code needed.


def __copy__(self):
cdef ByteStringReferenceCollection n = ByteStringReferenceCollection()

return n


cdef class Context(object):
"""
I hold the AMF context for en/decoding streams.
Expand Down
4 changes: 2 additions & 2 deletions pyamf/amf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,13 @@ class Context(codec.Context):
I hold the AMF3 context for en/decoding streams.

@ivar strings: A list of string references.
@type strings: C{list}
@type strings: L{codec.ByteStringReferenceCollection}
@ivar classes: A list of L{ClassDefinition}.
@type classes: C{list}
"""

def __init__(self):
self.strings = codec.IndexedCollection(use_hash=True)
self.strings = codec.ByteStringReferenceCollection()
self.classes = {}
self.class_ref = {}

Expand Down
34 changes: 28 additions & 6 deletions pyamf/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,30 @@ def __repr__(self):
id(self))


class ByteStringReferenceCollection(IndexedCollection):
"""
There have been rare hash collisions within a single AMF payload causing
corrupt payloads.

Which strings cause collisions is dependent on the python runtime, each
platform might have a slightly different implementation which means that
testing is extremely difficult.
"""

def __init__(self, *args, **kwargs):
super(ByteStringReferenceCollection, self).__init__(use_hash=False)

def getReferenceTo(self, byte_string):
return self.dict.get(byte_string, -1)

def append(self, byte_string):
self.list.append(byte_string)
idx = len(self.list) - 1
self.dict[byte_string] = idx

return idx


class Context(object):
"""
The base context for all AMF [de|en]coding.
Expand Down Expand Up @@ -215,13 +239,12 @@ def getStringForBytes(self, s):

@since: 0.6
"""
h = hash(s)
u = self._unicodes.get(h, None)
u = self._unicodes.get(s, None)

if u is not None:
return u

u = self._unicodes[h] = s.decode('utf-8')
u = self._unicodes[s] = s.decode('utf-8')

return u

Expand All @@ -232,13 +255,12 @@ def getBytesForString(self, u):

@since: 0.6
"""
h = hash(u)
s = self._unicodes.get(h, None)
s = self._unicodes.get(u, None)

if s is not None:
return s

s = self._unicodes[h] = u.encode('utf-8')
s = self._unicodes[u] = u.encode('utf-8')

return s

Expand Down