ipsilon/util/log.py

   1 # Copyright (C) 2014 Ipsilon Project Contributors
   2 #
   3 # See the file named COPYING for the project license
   4
   5 import cherrypy
   6 import cStringIO
   7 import inspect
   8 import os
   9 import traceback
  10 import logging
  11
  12
  13 def log_request_response():
  14     '''Log the contents of the request and subsequent response.
  15
  16     This is run as a tool hook and should be run as the last hook
  17     (on_end_resource) just before the server writes the response to
  18     the client. The tool is registered like this:
  19
  20     cherrypy.tools.log_request_response = \
  21         cherrypy.Tool('on_end_resource', log_request_response)
  22
  23     Then the logging can be enabled or disabled via the config option,
  24     for example to turn the logging on via a config dict add this
  25     key/value pair:
  26
  27     'tools.log_request_response.on': True,
  28
  29     or for a config file add this line:
  30
  31     tools.log_request_response.on = True
  32
  33     At first blush it would seem easy to log the request received and
  34     the response emitted, just hook those locations and output the raw
  35     data. Unfortunately there are no such locations in cherrypy where
  36     you have access to the raw input and output data. The complicating
  37     factors are:
  38
  39     * Input data is consumed by the Request object off a file object
  40       which cannot seek back (i.e. rewind), once the data is read it
  41       cannot be read again. Therefore any attempt to log the raw input
  42       will starve read operations done by the Request object to read
  43       the headers and body.
  44
  45     * Automatic consumption and processing of the body contents is
  46       enabled by default (controlled by the
  47       request.process_request_body config option). You generally do
  48       not want to turn these automatic request processors off because
  49       they provide valuable input processing useful to the request
  50       processing pipeline (e.g. when Content-Type is
  51       application/x-www-form-urlencoded, or multipart/form-data or
  52       multipart). Most cherrypy page handlers expect this
  53       pre-processing to have been performed and the 'cooked' data to
  54       be availabe on the Request object. Thus rather than logging the
  55       raw input HTTP which will have been consumed by the request
  56       processing logic we are forced into logging only the 'cooked'
  57       values available to us after the request has been read and
  58       processed.
  59
  60     * The response body may not be available if response streaming is
  61       enabled. This is because control is passed directly to the
  62       object writing the data to the client bypassing the normal
  63       cherrypy hooks. Fortunately streaming is not recommended and we
  64       can expect it will be disabled. When streaming is disabled the
  65       response body can be composed as:
  66
  67         - string
  68         - list of strings
  69         - a generator yielding chunks of strings
  70
  71       When the response body is a string or list of strings logging
  72       the body and then passing down the pipeline to be written to the
  73       client is trivial. However when part of the body is produced by
  74       a generator we must run the generator to produce that part of
  75       the body and store it as a string. This is an issue equivalent
  76       to not being able to re-read a file object as seen in the input
  77       situation. Once the generator has run it cannot be run
  78       again. Therefore we consume all the body output, store it in a
  79       string, log it and then replace the request body contents with
  80       the body string we formed. It's this body string which is
  81       subsequently sent down the processing pipeline to be written to
  82       the client.
  83
  84     '''
  85
  86     # --- Begin local functions ---
  87
  88     def indent_text(text, level=0, indent='    '):
  89         '''
  90         Input is a block of text potentially containing newlines which
  91         seperate the text into a sequence of lines. The text block is
  92         split into individual lines and indented according to the
  93         indentation level. The width of the indent is controlled by
  94         the optional indent parameter.
  95
  96         The result is a single block of text where each of the
  97         original lines of text are indented.
  98         '''
  99
 100         f = cStringIO.StringIO()
 101
 102         lines = text.split('\n')
 103
 104         # Do not output trailing newline
 105         if lines and lines[-1] == '':
 106             lines.pop(-1)
 107
 108         for line in lines:
 109             f.write(indent*level)
 110             f.write(line)
 111             f.write('\n')
 112
 113         string = f.getvalue()
 114         f.close()
 115         return string
 116
 117     def print_part(part):
 118         '''
 119         Format a cherrypy._cpreqbody.Part object into a string.
 120
 121         When the request Content-Type is a multipart cherrypy splits
 122         each part of the multipart into a Part object containing
 123         information about the part and it's content.
 124         '''
 125         f = cStringIO.StringIO()
 126
 127         f.write(indent_text('Name = %s\n' % part.name))
 128         if part.headers:
 129             f.write(indent_text('Headers:\n'))
 130             for name, value in part.headers.items():
 131                 f.write(indent_text('%s: %s\n' % (name, value), 1))
 132
 133         f.write(indent_text("Body:\n"))
 134         f.write(indent_text(part.fullvalue(), 1))
 135
 136         string = f.getvalue()
 137         f.close()
 138         return string
 139
 140     def print_param(name, value):
 141         f = cStringIO.StringIO()
 142
 143         # Might be a multipart Part object, if so format it
 144         if isinstance(value, cherrypy._cpreqbody.Part):  # pylint:disable=W0212
 145             f.write(indent_text("%s:\n" % (name)))
 146             f.write(indent_text(print_part(value), 1))
 147         else:
 148             # Not a mulitpart, just write it as a string
 149             f.write(indent_text("%s: %s\n" % (name, value)))
 150
 151         string = f.getvalue()
 152         f.close()
 153         return string
 154
 155     def collapse_body(body):
 156         '''The cherrypy response body can be:
 157
 158         * string
 159         * list of strings
 160         * generator yielding a string
 161
 162         Generators are typically used for file contents but any
 163         cherrypy response is permitted to use a generator to provide
 164         the body of the response.
 165
 166         Strings and lists of strings are immediately available and
 167         stored in the request object. During normal cherrypy
 168         processing when writing the response to the client response
 169         data which is provided by a generator will be iterated over
 170         and written to the client. In order for us to be able to log
 171         all the response data prior to it being sent to the client we
 172         must also iterate over the generator provided content, however
 173         this exhausts the generator making it unavailable to be
 174         written to the client.
 175
 176         To solve this problem we collect all the response data. Now we
 177         have the full body contents, we can log it and then set this
 178         as the new body contents for remainder of the processing
 179         pipeline to act upon (i.e. sent to the client)
 180         '''
 181         f = cStringIO.StringIO()
 182
 183         for chunk in body:
 184             f.write(chunk)
 185
 186         string = f.getvalue()
 187         f.close()
 188         return string
 189
 190     # --- End local functions ---
 191
 192     f = cStringIO.StringIO()
 193     request = cherrypy.serving.request
 194     remote = request.remote
 195
 196     #
 197     # Log the Request
 198     #
 199     f.write(indent_text("<Request> [%s] %s\n" %
 200                         (remote.name or remote.ip, request.request_line), 0))
 201
 202     # Request Headers
 203     if request.headers:
 204         f.write(indent_text("Headers:\n", 1))
 205         for name, value in request.headers.items():
 206             f.write(indent_text("%s: %s\n" % (name, value), 2))
 207
 208     # Request parameters from URL query string and
 209     # x-www-form-urlencoded POST data
 210     if request.body.params:
 211         f.write(indent_text("Params:\n", 1))
 212         for name, value in request.body.params.items():
 213             # Multi-valued paramater is in a list
 214             if isinstance(value, list):
 215                 for i, item in enumerate(value):
 216                     f.write(indent_text(print_param("%s[%d]" % (name, i),
 217                                                     item), 2))
 218             else:
 219                 f.write(indent_text(print_param(name, value), 2))
 220
 221     # If the body is multipart format each of the parts
 222     if request.body.parts:
 223         f.write(indent_text("Body Parts:\n"))
 224         for i, part in enumerate(request.body.parts):
 225             f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3))
 226             f.write(indent_text(print_part(part), 4))
 227
 228     #
 229     # Log the Response
 230     #
 231     response = cherrypy.response
 232     f.write(indent_text("<Response> %s\n" % response.status, 0))
 233
 234     # Log the response headers
 235     if response.header_list:
 236         f.write(indent_text("Headers:\n", 1))
 237         for name, value in response.header_list:
 238             f.write(indent_text("%s: %s\n" % (name, value), 2))
 239
 240     # Log the response body
 241     #
 242     # We can only do this if the response is not streaming because we have
 243     # no way to hook the streaming content.
 244     f.write(indent_text("Body:\n", 1))
 245
 246     if response.stream:
 247         f.write(indent_text("body omitted because response is streaming\n", 2))
 248     else:
 249         response.body = collapse_body(response.body)
 250         for chunk in response.body:
 251             f.write(indent_text(chunk, 2))
 252
 253     string = f.getvalue()
 254     f.close()
 255     print string
 256
 257 cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource',
 258                                                     log_request_response)
 259
 260
 261 class Log(object):
 262
 263     @staticmethod
 264     def stacktrace():
 265         buf = cStringIO.StringIO()
 266
 267         stack = traceback.extract_stack()
 268         traceback.print_list(stack[:-2], file=buf)
 269
 270         stacktrace_string = buf.getvalue()
 271         buf.close()
 272         return stacktrace_string
 273
 274     @staticmethod
 275     def get_class_from_frame(frame_obj):
 276         '''
 277         Taken from:
 278         http://stackoverflow.com/questions/2203424/
 279         python-how-to-retrieve-class-information-from-a-frame-object
 280
 281         At the frame object level, there does not seem to be any way
 282         to find the actual python function object that has been
 283         called.
 284
 285         However, if your code relies on the common convention of naming
 286         the instance parameter of a method self, then you could do this.
 287         '''
 288
 289         args, _, _, value_dict = inspect.getargvalues(frame_obj)
 290         # Is the functions first parameter named 'self'?
 291         if len(args) and args[0] == 'self':
 292             # in that case, 'self' will be referenced in value_dict
 293             instance = value_dict.get('self', None)
 294             if instance:
 295                 # return its class
 296                 return getattr(instance, '__class__', None)
 297         # return None otherwise
 298         return None
 299
 300     @staticmethod
 301     def call_location():
 302         frame = inspect.stack()[2]
 303         frame_obj = frame[0]
 304         filename = frame[1]
 305         line_number = frame[2]
 306         func = frame[3]
 307
 308         # Only report the last 3 components of the path
 309         filename = os.sep.join(filename.split(os.sep)[-3:])
 310
 311         cls = Log.get_class_from_frame(frame_obj)
 312         if cls:
 313             location = '%s:%s %s.%s()' %  \
 314                        (filename, line_number, cls.__name__, func)
 315         else:
 316             location = '%s:%s %s()' % (filename, line_number, func)
 317         return location
 318
 319     def debug(self, fact):
 320         if cherrypy.config.get('debug', False):
 321             location = Log.call_location()
 322             cherrypy.log('DEBUG(%s): %s' % (location, fact))
 323
 324     # for compatibility with existing code
 325     _debug = debug
 326
 327     def log(self, fact):
 328         cherrypy.log(fact)
 329
 330     def error(self, fact):
 331         cherrypy.log.error('ERROR: %s' % fact, severity=logging.ERROR)
 332         if cherrypy.config.get('stacktrace_on_error', False):
 333             cherrypy.log.error(Log.stacktrace())