ipsilon/util/log.py

   1 # Copyright (C) 2014 Ipsilon project Contributors, for license see COPYING
   2
   3 import cherrypy
   4 import cStringIO
   5 import inspect
   6 import os
   7 import traceback
   8 import logging
   9
  10
  11 def log_request_response():
  12     '''Log the contents of the request and subsequent response.
  13
  14     This is run as a tool hook and should be run as the last hook
  15     (on_end_resource) just before the server writes the response to
  16     the client. The tool is registered like this:
  17
  18     cherrypy.tools.log_request_response = \
  19         cherrypy.Tool('on_end_resource', log_request_response)
  20
  21     Then the logging can be enabled or disabled via the config option,
  22     for example to turn the logging on via a config dict add this
  23     key/value pair:
  24
  25     'tools.log_request_response.on': True,
  26
  27     or for a config file add this line:
  28
  29     tools.log_request_response.on = True
  30
  31     At first blush it would seem easy to log the request received and
  32     the response emitted, just hook those locations and output the raw
  33     data. Unfortunately there are no such locations in cherrypy where
  34     you have access to the raw input and output data. The complicating
  35     factors are:
  36
  37     * Input data is consumed by the Request object off a file object
  38       which cannot seek back (i.e. rewind), once the data is read it
  39       cannot be read again. Therefore any attempt to log the raw input
  40       will starve read operations done by the Request object to read
  41       the headers and body.
  42
  43     * Automatic consumption and processing of the body contents is
  44       enabled by default (controlled by the
  45       request.process_request_body config option). You generally do
  46       not want to turn these automatic request processors off because
  47       they provide valuable input processing useful to the request
  48       processing pipeline (e.g. when Content-Type is
  49       application/x-www-form-urlencoded, or multipart/form-data or
  50       multipart). Most cherrypy page handlers expect this
  51       pre-processing to have been performed and the 'cooked' data to
  52       be availabe on the Request object. Thus rather than logging the
  53       raw input HTTP which will have been consumed by the request
  54       processing logic we are forced into logging only the 'cooked'
  55       values available to us after the request has been read and
  56       processed.
  57
  58     * The response body may not be available if response streaming is
  59       enabled. This is because control is passed directly to the
  60       object writing the data to the client bypassing the normal
  61       cherrypy hooks. Fortunately streaming is not recommended and we
  62       can expect it will be disabled. When streaming is disabled the
  63       response body can be composed as:
  64
  65         - string
  66         - list of strings
  67         - a generator yielding chunks of strings
  68
  69       When the response body is a string or list of strings logging
  70       the body and then passing down the pipeline to be written to the
  71       client is trivial. However when part of the body is produced by
  72       a generator we must run the generator to produce that part of
  73       the body and store it as a string. This is an issue equivalent
  74       to not being able to re-read a file object as seen in the input
  75       situation. Once the generator has run it cannot be run
  76       again. Therefore we consume all the body output, store it in a
  77       string, log it and then replace the request body contents with
  78       the body string we formed. It's this body string which is
  79       subsequently sent down the processing pipeline to be written to
  80       the client.
  81
  82     '''
  83
  84     # --- Begin local functions ---
  85
  86     def indent_text(text, level=0, indent='    '):
  87         '''
  88         Input is a block of text potentially containing newlines which
  89         seperate the text into a sequence of lines. The text block is
  90         split into individual lines and indented according to the
  91         indentation level. The width of the indent is controlled by
  92         the optional indent parameter.
  93
  94         The result is a single block of text where each of the
  95         original lines of text are indented.
  96         '''
  97
  98         f = cStringIO.StringIO()
  99
 100         lines = text.split('\n')
 101
 102         # Do not output trailing newline
 103         if lines and lines[-1] == '':
 104             lines.pop(-1)
 105
 106         for line in lines:
 107             f.write(indent*level)
 108             f.write(line)
 109             f.write('\n')
 110
 111         string = f.getvalue()
 112         f.close()
 113         return string
 114
 115     def print_part(part):
 116         '''
 117         Format a cherrypy._cpreqbody.Part object into a string.
 118
 119         When the request Content-Type is a multipart cherrypy splits
 120         each part of the multipart into a Part object containing
 121         information about the part and it's content.
 122         '''
 123         f = cStringIO.StringIO()
 124
 125         f.write(indent_text('Name = %s\n' % part.name))
 126         if part.headers:
 127             f.write(indent_text('Headers:\n'))
 128             for name, value in part.headers.items():
 129                 f.write(indent_text('%s: %s\n' % (name, value), 1))
 130
 131         f.write(indent_text("Body:\n"))
 132         f.write(indent_text(part.fullvalue(), 1))
 133
 134         string = f.getvalue()
 135         f.close()
 136         return string
 137
 138     def print_param(name, value):
 139         f = cStringIO.StringIO()
 140
 141         # Might be a multipart Part object, if so format it
 142         if isinstance(value, cherrypy._cpreqbody.Part):  # pylint:disable=W0212
 143             f.write(indent_text("%s:\n" % (name)))
 144             f.write(indent_text(print_part(value), 1))
 145         else:
 146             # Not a mulitpart, just write it as a string
 147             f.write(indent_text("%s: %s\n" % (name, value)))
 148
 149         string = f.getvalue()
 150         f.close()
 151         return string
 152
 153     def collapse_body(body):
 154         '''The cherrypy response body can be:
 155
 156         * string
 157         * list of strings
 158         * generator yielding a string
 159
 160         Generators are typically used for file contents but any
 161         cherrypy response is permitted to use a generator to provide
 162         the body of the response.
 163
 164         Strings and lists of strings are immediately available and
 165         stored in the request object. During normal cherrypy
 166         processing when writing the response to the client response
 167         data which is provided by a generator will be iterated over
 168         and written to the client. In order for us to be able to log
 169         all the response data prior to it being sent to the client we
 170         must also iterate over the generator provided content, however
 171         this exhausts the generator making it unavailable to be
 172         written to the client.
 173
 174         To solve this problem we collect all the response data. Now we
 175         have the full body contents, we can log it and then set this
 176         as the new body contents for remainder of the processing
 177         pipeline to act upon (i.e. sent to the client)
 178         '''
 179         f = cStringIO.StringIO()
 180
 181         for chunk in body:
 182             f.write(chunk)
 183
 184         string = f.getvalue()
 185         f.close()
 186         return string
 187
 188     # --- End local functions ---
 189
 190     f = cStringIO.StringIO()
 191     request = cherrypy.serving.request
 192     remote = request.remote
 193
 194     #
 195     # Log the Request
 196     #
 197     f.write(indent_text("<Request> [%s] %s\n" %
 198                         (remote.name or remote.ip, request.request_line), 0))
 199
 200     # Request Headers
 201     if request.headers:
 202         f.write(indent_text("Headers:\n", 1))
 203         for name, value in request.headers.items():
 204             f.write(indent_text("%s: %s\n" % (name, value), 2))
 205
 206     # Request parameters from URL query string and
 207     # x-www-form-urlencoded POST data
 208     if request.body.params:
 209         f.write(indent_text("Params:\n", 1))
 210         for name, value in request.body.params.items():
 211             # Multi-valued paramater is in a list
 212             if isinstance(value, list):
 213                 for i, item in enumerate(value):
 214                     f.write(indent_text(print_param("%s[%d]" % (name, i),
 215                                                     item), 2))
 216             else:
 217                 f.write(indent_text(print_param(name, value), 2))
 218
 219     # If the body is multipart format each of the parts
 220     if request.body.parts:
 221         f.write(indent_text("Body Parts:\n"))
 222         for i, part in enumerate(request.body.parts):
 223             f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3))
 224             f.write(indent_text(print_part(part), 4))
 225
 226     #
 227     # Log the Response
 228     #
 229     response = cherrypy.response
 230     f.write(indent_text("<Response> %s\n" % response.status, 0))
 231
 232     # Log the response headers
 233     if response.header_list:
 234         f.write(indent_text("Headers:\n", 1))
 235         for name, value in response.header_list:
 236             f.write(indent_text("%s: %s\n" % (name, value), 2))
 237
 238     # Log the response body
 239     #
 240     # We can only do this if the response is not streaming because we have
 241     # no way to hook the streaming content.
 242     f.write(indent_text("Body:\n", 1))
 243
 244     if response.stream:
 245         f.write(indent_text("body omitted because response is streaming\n", 2))
 246     else:
 247         response.body = collapse_body(response.body)
 248         for chunk in response.body:
 249             f.write(indent_text(chunk, 2))
 250
 251     string = f.getvalue()
 252     f.close()
 253     print string
 254
 255 cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource',
 256                                                     log_request_response)
 257
 258
 259 class Log(object):
 260
 261     @staticmethod
 262     def stacktrace():
 263         buf = cStringIO.StringIO()
 264
 265         stack = traceback.extract_stack()
 266         traceback.print_list(stack[:-2], file=buf)
 267
 268         stacktrace_string = buf.getvalue()
 269         buf.close()
 270         return stacktrace_string
 271
 272     @staticmethod
 273     def get_class_from_frame(frame_obj):
 274         '''
 275         Taken from:
 276         http://stackoverflow.com/questions/2203424/
 277         python-how-to-retrieve-class-information-from-a-frame-object
 278
 279         At the frame object level, there does not seem to be any way
 280         to find the actual python function object that has been
 281         called.
 282
 283         However, if your code relies on the common convention of naming
 284         the instance parameter of a method self, then you could do this.
 285         '''
 286
 287         args, _, _, value_dict = inspect.getargvalues(frame_obj)
 288         # Is the functions first parameter named 'self'?
 289         if len(args) and args[0] == 'self':
 290             # in that case, 'self' will be referenced in value_dict
 291             instance = value_dict.get('self', None)
 292             if instance:
 293                 # return its class
 294                 return getattr(instance, '__class__', None)
 295         # return None otherwise
 296         return None
 297
 298     @staticmethod
 299     def call_location():
 300         """Return string describing where in the code a logging call was made.
 301
 302         When reading a log file it is very helpful to know where in the
 303         code the message was emitted from.
 304
 305         Returns a string of the form:
 306         "filename:line_number class"
 307
 308         The filename is a path truncated to the last 3 pathname
 309         components to give just enough context without being too verbose.
 310
 311         This function walks up the stack bypassing any functions that
 312         appear to be part of the logging operation itself. The first
 313         such frame not part of logging is assumed to be the location
 314         of the code that emitted the log message.
 315
 316         :return: string describing code location of logging call
 317         """
 318
 319         # Each frame is a 6-tuple:
 320         #
 321         # the frame object,
 322         # the filename,
 323         # the line number of the current line,
 324         # the function name,
 325         # a list of lines of context from the source code,
 326         # the index of the current line within that list
 327
 328         # Walk up the stack until we find a function name which is not
 329         # a logging function, that frame is the logging caller and hence
 330         # the location where the call to logging was made.
 331         try:
 332             frame = None
 333             frame_obj = None
 334             stack = inspect.stack()
 335             for frame in stack:
 336                 if frame[3] not in ('call_location', 'log', 'debug', 'error'):
 337                     break
 338
 339             if not frame:
 340                 frame = stack[0]
 341
 342             frame_obj = frame[0]
 343             filename = frame[1]
 344             line_number = frame[2]
 345             func = frame[3]
 346
 347             # Only report the last 3 components of the path
 348             filename = os.sep.join(filename.split(os.sep)[-3:])
 349
 350             cls = Log.get_class_from_frame(frame_obj)
 351         finally:
 352             # Don't keep reference to frame object
 353             del frame_obj
 354             del stack
 355
 356         if cls:
 357             location = '%s:%s %s.%s()' %  \
 358                        (filename, line_number, cls.__name__, func)
 359         else:
 360             location = '%s:%s %s()' % (filename, line_number, func)
 361         return location
 362
 363     def debug(self, fact):
 364         if cherrypy.config.get('debug', False):
 365             location = Log.call_location()
 366             cherrypy.log('DEBUG(%s): %s' % (location, fact))
 367
 368     # for compatibility with existing code
 369     _debug = debug
 370
 371     def log(self, fact):
 372         cherrypy.log(fact)
 373
 374     def error(self, fact):
 375         cherrypy.log.error('ERROR: %s' % fact, severity=logging.ERROR)
 376         if cherrypy.config.get('stacktrace_on_error', False):
 377             cherrypy.log.error(Log.stacktrace())