Skip to content
This repository has been archived by the owner on Mar 8, 2024. It is now read-only.

Commit

Permalink
23038: try to enhance slightly the SMS 7-bit encoding / decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
p1-bmu committed Nov 8, 2023
1 parent 788ed0f commit 88ec5ac
Showing 1 changed file with 30 additions and 13 deletions.
43 changes: 30 additions & 13 deletions pycrate_mobile/TS23038.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# * Authors : Benoit Michau
# *--------------------------------------------------------
#*/

'''
__all__ = [
'DCS_7B',
'DCS_8B',
Expand All @@ -38,13 +38,15 @@
'encode_7b_cbs',
'decode_7b_cbs'
]
'''
from binascii import hexlify

#------------------------------------------------------------------------------#
# 3GPP TS 23.038: Alphabets and language-specific information
# release 13 (d00)
#------------------------------------------------------------------------------#

from pycrate_core.utils import pack_val, TYPE_UINT, PycrateErr
from pycrate_core.utils import pack_val, TYPE_UINT, PycrateErr, bytes_lshift
from pycrate_core.charpy import Charpy
from pycrate_core.elt import Envelope
from pycrate_core.base import Uint
Expand Down Expand Up @@ -548,6 +550,8 @@ def _set_cla_dic(self):

def encode_7b(txt, off=0):
"""translates the unicode string `txt' to a GSM 7 bit characters buffer
Enables the `txt' string to start at a non-null offset `off' as it is the case
with fill bits after certain SMS User-Data-Headers
Args:
txt (utf8 str): text string to encode
Expand All @@ -566,19 +570,20 @@ def encode_7b(txt, off=0):
except KeyError:
raise(PycrateErr('invalid GSM 7 bit char: %r' % c))
else:
# add an escape char
# add the extension escape char
arr.append( (TYPE_UINT, 27, 7) )
cnt += 2
else:
cnt += 1
# check the length in bits and add padding bits
pad = ((8-(7*len(arr)+off)%8)%8)
arr.insert(0, (TYPE_UINT, 0, pad))
# check the length in bits and add fill bits (for the UDH)
arr.insert(0, (TYPE_UINT, 0, off))
return bytes(reversed(pack_val(*arr)[0])), cnt


def decode_7b(buf, off=0):
"""translates the GSM 7 bit characters buffer `buf' to an unicode string
Enables the string to be decoded to start at a non-null offset `off' as it is
the case with fill bits after certain SMS User-Data-Headers
Args:
buf (bytes): buffer to decode
Expand All @@ -587,26 +592,38 @@ def decode_7b(buf, off=0):
Returns:
decoded text string (utf8 str)
"""
char = Charpy(bytes(reversed(buf)))
# jump over the padding bits
# WNG: in case of 7 bits padding, we will have an @ at the end
chars_num = (8*len(buf)-off) // 7
char._cur = (8*len(buf)-off)-(7*chars_num)
# WNG: in case of 7 remaining bits at the end of the buffer, we will have an @
# at the end of the returned string
# we would need the chars_num parameter passed as function arg to fix this
#
char = Charpy(bytes(reversed(buf)))
#
# get the size and number of characters
buf_bl = len(buf) << 3
chars_bl = buf_bl - off
chars_num = chars_bl // 7
# jump over the fill bits (from the UDH)
char._cur = chars_bl - (7 * chars_num)
#
# get all chars
arr = [char.get_uint(7) for i in range(chars_num)]
chars = []
#
for i, v in enumerate(arr):
if v == 27:
# escape char, replace last char with extended content
try:
chars[-1] = _GSM7bExtLUT[arr[i-1]]
except:
except KeyError:
chars.append(u' ')
else:
chars.append(_GSM7bLUT[v])
return u''.join(reversed(chars))

# TODO: we should test more thoroughly the 7b codec when a UDH is present and creates a misalignment
# Here are some wireshark filters to collect such kind of SMS
# gsm_sms.tp-dcs == 0 and gsm_sms.dis_field_udh.user_data_header_length != 6 and gsm_sms.ie_identifier != 0x00
# gsm_sms.dis_field_udh.gsm.fill_bits == 0x0 and gsm_sms.ie_identifier != 0x00


def encode_7b_cbs(txt):
"""translates the unicode string `txt' into a tuple of page(s)
Expand Down

0 comments on commit 88ec5ac

Please sign in to comment.