|
| 1 | +# |
| 2 | +# Copyright (c) 2013 Victor Vasiliev |
| 3 | +# |
| 4 | +# Python client for Project Athena forum system. |
| 5 | +# See LICENSE file for more details. |
| 6 | +# |
| 7 | +# I give absolutely no guranatees that this code will work and that it will |
| 8 | +# not do anything which may lead to accidental corruption or destruction of |
| 9 | +# the data. As you will read the file, you will see why. |
| 10 | +# |
| 11 | + |
| 12 | +# |
| 13 | +# This file implements the protocol for discuss. discuss is the forum service |
| 14 | +# from Project Athena which was intended as a clone of Multics forum |
| 15 | +# application. discuss(1) still refers you to "Multics forum manual" (with a |
| 16 | +# smileyface, which is probably due to the fact that the manpage itself hardly |
| 17 | +# fills half of the normal terminal window size). |
| 18 | +# |
| 19 | +# By 2013, when this comment was written, that forum was used solely for |
| 20 | +# storing mailing list archives. Hence this implementation at the current |
| 21 | +# moment is sufficient only for extracting discussions, but not for posting. |
| 22 | +# |
| 23 | +# The protocol itself is based upon USP: "UNIX Universal Streaming Protocol", |
| 24 | +# which was apparently one of the attempts to create a universal data |
| 25 | +# representation protocol (like XDR, ASN.1, XML, JSON, protobufs, etc) used by |
| 26 | +# the discuss developers because that was a new shiny thing from LCS back in |
| 27 | +# the day. One would guess that since the only implementation of it still in |
| 28 | +# the wild is discuss, the protocol is only used by discuss itself. This is, |
| 29 | +# not, however, true. Discuss does not actually use USP: it hijacks into the |
| 30 | +# middle of USP library, copies the parts of the connection code and then uses |
| 31 | +# the USP data representation routines (which are not even exported from that |
| 32 | +# library in heaeder files) without actually doing USP. |
| 33 | +# |
| 34 | +# As I found out (because of the copyright header), the protocol was part of |
| 35 | +# certain distriubted mail system called PCmail, which even has a few RFCs |
| 36 | +# dedicated to it. |
| 37 | +# |
| 38 | +# The USP code is in usp/ tree and the discuss usage of it is in |
| 39 | +# libds/rpcall.c. Note that in Debathena those are compiled as two different |
| 40 | +# static libraries. libds uses usp routines, even though they are not even |
| 41 | +# exported in the header file. On, and the whole suite is written in K&R C. |
| 42 | +# |
| 43 | + |
| 44 | +import socket |
| 45 | +from struct import pack, unpack, calcsize |
| 46 | +from functools import partial |
| 47 | + |
| 48 | +from . import constants |
| 49 | + |
| 50 | +class ProtocolError(Exception): |
| 51 | + pass |
| 52 | + |
| 53 | +# Data formats, in their USP names. USP "cardinal" means "unsigned" or something |
| 54 | +# like that (discuss rpcall.c calls it "short", which is more reasonable). |
| 55 | +_formats = { |
| 56 | + "boolean" : "!H", # Yes, really, bool is two bytes |
| 57 | + "integer" : "!h", |
| 58 | + "cardinal" : "!H", |
| 59 | + "long_integer" : "!i", |
| 60 | + "long_cardinal" : "!I", |
| 61 | +} |
| 62 | + |
| 63 | +# This is a horrible kludge which I wrote for pymoira and hoped to forget that |
| 64 | +# it exists and that I ever wrote it. Unfortunately, it looks like Moira is not |
| 65 | +# the only Athena service which totally disregards such nice thing like GSSAPI. |
| 66 | +def _get_krb5_ap_req(service, server): |
| 67 | + """Returns the AP_REQ Kerberos 5 ticket for a given service.""" |
| 68 | + |
| 69 | + import kerberos, base64 |
| 70 | + try: |
| 71 | + status_code, context = kerberos.authGSSClientInit( '%s@%s' % (service,server) ) |
| 72 | + kerberos.authGSSClientStep(context, "") |
| 73 | + token_gssapi = base64.b64decode( kerberos.authGSSClientResponse(context) ) |
| 74 | + |
| 75 | + # The following code "parses" GSSAPI token as described in RFC 2743 and |
| 76 | + # RFC 4121. "Parsing" in this context means throwing out the GSSAPI |
| 77 | + # header (because YOLO/IBTSOCS) while doing some very basic validation |
| 78 | + # of whether this is actually what we want. |
| 79 | + # |
| 80 | + # This code is here because Python's interface provides only GSSAPI |
| 81 | + # interface, and discuss does not use GSSAPI. This should be fixed at |
| 82 | + # some point, hopefully through total deprecation of discuss. Thermite |
| 83 | + # involvement is preferred. |
| 84 | + # |
| 85 | + # FIXME: this probably should either parse tokens properly or use |
| 86 | + # another Kerberos bindings for Python. Currently there are no sane |
| 87 | + # Python bindings for krb5 I am aware of. There's krb5 module, which |
| 88 | + # has not only terrible API, but also confusing error messages and |
| 89 | + # useless documentation. Perhaps the only fix is to write proper |
| 90 | + # bindings myself, but this is the yak I am not ready to shave at the |
| 91 | + # moment. |
| 92 | + |
| 93 | + body_start = token_gssapi.find( chr(0x01) + chr(0x00) ) # 01 00 indicates that this is AP_REQ |
| 94 | + if token_gssapi[0] != chr(0x60) or \ |
| 95 | + not (token_gssapi[2] == chr(0x06) or token_gssapi[4] == chr(0x06)) or \ |
| 96 | + body_start == -1 or body_start < 8 or body_start > 64: |
| 97 | + raise ProtocolError("Invalid GSSAPI token provided by Python's Kerberos API") |
| 98 | + |
| 99 | + body = token_gssapi[body_start + 2:] |
| 100 | + return body |
| 101 | + except kerberos.GSSError as err: |
| 102 | + raise ProtocolError("Kerberos authentication error: %s" % err[1][0]) |
| 103 | + |
| 104 | +class USPBlock(object): |
| 105 | + """Class which allows to build USP blocks.""" |
| 106 | + |
| 107 | + def __init__(self, block_type): |
| 108 | + # Create read_* and put_* methods |
| 109 | + self.__dict__.update({ |
| 110 | + ("put_" + name) : partial(self.put_data, fmt) |
| 111 | + for name, fmt in _formats.items() |
| 112 | + }) |
| 113 | + self.__dict__.update({ |
| 114 | + ("read_" + name) : partial(self.read_data, fmt) |
| 115 | + for name, fmt in _formats.items() |
| 116 | + }) |
| 117 | + |
| 118 | + self.buffer = b"" |
| 119 | + self.block_type = block_type |
| 120 | + |
| 121 | + def put_data(self, fmt, s): |
| 122 | + """Put formatted data into the buffer.""" |
| 123 | + |
| 124 | + self.buffer += pack(fmt, s) |
| 125 | + |
| 126 | + def put_string(self, s): |
| 127 | + """Put a string into the buffer.""" |
| 128 | + |
| 129 | + if "\0" in s: |
| 130 | + raise USPError("Null characeters are not allowed in USP") |
| 131 | + |
| 132 | + # "\n" is translated to "\r\n", and "\r" to "\r\0". Because we can. Or |
| 133 | + # because that seemed like a nice cross-platform feature. Or for weird |
| 134 | + # technical reasons from 1980s I do not really want to know. This works |
| 135 | + # out because input is null-terminated and wire format is has length |
| 136 | + # specified. |
| 137 | + encoded = s.replace("\r", "\r\0").replace("\n", "\r\n") |
| 138 | + self.put_cardinal(len(encoded)) |
| 139 | + self.buffer += encoded |
| 140 | + |
| 141 | + # Padding |
| 142 | + if len(encoded) % 2 == 1: |
| 143 | + self.buffer += "\0" |
| 144 | + |
| 145 | + def send(self, sock): |
| 146 | + """Sends the block over a socket.""" |
| 147 | + |
| 148 | + # Maximum size of a subblock (MAX_SUB_BLOCK_LENGTH) |
| 149 | + magic_number = 508 |
| 150 | + |
| 151 | + sock.sendall(pack("!H", self.block_type)) |
| 152 | + |
| 153 | + # Each block is fragmented into subblocks with a 16-bit header |
| 154 | + unsent = self.buffer |
| 155 | + first_pass = True |
| 156 | + while len(unsent) > 0 or first_pass: |
| 157 | + first_pass = False |
| 158 | + |
| 159 | + if len(unsent) > magic_number: |
| 160 | + current, unsent = unsent[0:magic_number], unsent[magic_number:] |
| 161 | + last = False |
| 162 | + else: |
| 163 | + current, unsent = unsent, "" |
| 164 | + last = True |
| 165 | + |
| 166 | + # Header is length of the subblock + last block marker |
| 167 | + header_number = len(current) + 2 # Length + header size |
| 168 | + if last: |
| 169 | + header_number |= 0x8000 |
| 170 | + header = pack("!H", header_number) |
| 171 | + |
| 172 | + sock.sendall(header + current) |
| 173 | + |
| 174 | + def read_data(self, fmt): |
| 175 | + """Read a data using a type specifier.""" |
| 176 | + |
| 177 | + size = calcsize(fmt) |
| 178 | + if len(self.buffer) < size: |
| 179 | + raise ProtocolError("Invalid data received from the client (block is too short)") |
| 180 | + |
| 181 | + data, self.buffer = self.buffer[0:size], self.buffer[size:] |
| 182 | + unpacked, = unpack(fmt, data) |
| 183 | + return unpacked |
| 184 | + |
| 185 | + def read_string(self): |
| 186 | + """Read a string from the buffer.""" |
| 187 | + |
| 188 | + size = self.read_cardinal() |
| 189 | + |
| 190 | + if len(self.buffer) < size: |
| 191 | + raise ProtocolError("Invalid data received from the client (block is too short)") |
| 192 | + omit = size + 1 if size % 2 ==1 else size # due to padding |
| 193 | + encoded, self.buffer = self.buffer[0:size], self.buffer[omit:] |
| 194 | + |
| 195 | + return encoded.replace("\r\n", "\n").replace("\r\0", "\r") |
| 196 | + |
| 197 | + @staticmethod |
| 198 | + def receive(sock): |
| 199 | + """Receives a block sent over the network.""" |
| 200 | + |
| 201 | + header = sock.recv(2) |
| 202 | + block = USPBlock(unpack("!H", header)) |
| 203 | + |
| 204 | + # Note that here I deliberately increase the size compared to send() |
| 205 | + # because some of the code suggests that blocks larger than 512 bytes |
| 206 | + # may actually exist |
| 207 | + magic_number = 4096 |
| 208 | + |
| 209 | + last = False |
| 210 | + while not last: |
| 211 | + subheader, = unpack("!H", sock.recv(2)) |
| 212 | + last = (subheader & 0x8000) != 0 |
| 213 | + size = (subheader & 0x0FFF) - 2 |
| 214 | + if size > magic_number: |
| 215 | + raise ProtocolError("Subblock size is too large") |
| 216 | + |
| 217 | + buffer = b"" |
| 218 | + while len(buffer) < size: |
| 219 | + old_len = len(buffer) |
| 220 | + buffer += sock.recv(size - len(buffer)) |
| 221 | + if len(buffer) == old_len: |
| 222 | + raise ProtocolError("Connection broken while transmitting a block") |
| 223 | + |
| 224 | + block.buffer += buffer |
| 225 | + |
| 226 | + return block |
| 227 | + |
| 228 | +class RPCClient(object): |
| 229 | + def __init__(self, server, port, auth = True, timeout = None): |
| 230 | + self.server = socket.getfqdn(server) |
| 231 | + self.socket = socket.create_connection((server, port), timeout) |
| 232 | + |
| 233 | + auth_block = USPBlock(constants.KRB_TICKET) |
| 234 | + if auth: |
| 235 | + authenticator = _get_krb5_ap_req( "discuss", self.server ) |
| 236 | + |
| 237 | + # Discuss does the same thing for authentication as Moira does: it |
| 238 | + # sends AP_REQ to the server and prays that we do not get MITMed, |
| 239 | + # and that Kerberos will protect us from possible replay attacks on |
| 240 | + # that and what else. In Moira it was disappointing given that |
| 241 | + # GSSAPI exists for ~20 years and Moira was reasonably maintained |
| 242 | + # in general. I'm not judging discuss much, because it did not |
| 243 | + # receive much care since it was originally developed. |
| 244 | + # |
| 245 | + # What fascinates me here is the way discuss decided to improve on |
| 246 | + # the Moira's authentication protocol. Instead of just sending the |
| 247 | + # Kerberos ticket, it represents it as an array of bytes, and then |
| 248 | + # it takes every byte and converts it into a network-order short. |
| 249 | + # |
| 250 | + # My current hypothesis is that this is because USP does not |
| 251 | + # support bytes and sending things as an array of shorts seemed |
| 252 | + # like the easiest way to use the underlying buffer-control |
| 253 | + # routines. |
| 254 | + # |
| 255 | + # You may bemoan the state of computer science, but looking at |
| 256 | + # this, I feel like we became better at protocol design over last |
| 257 | + # 20 years. |
| 258 | + |
| 259 | + auth_block.put_cardinal(len(authenticator)) |
| 260 | + for byte in authenticator: |
| 261 | + auth_block.put_cardinal(ord(byte)) |
| 262 | + else: |
| 263 | + auth_block.put_cardinal(0) |
| 264 | + |
| 265 | + self.send(auth_block) |
| 266 | + |
| 267 | + def send(self, block): |
| 268 | + block.send(self.socket) |
| 269 | + |
| 270 | + def receive(self): |
| 271 | + return USPBlock.receive(self.socket) |
| 272 | + |
0 commit comments