ipsilon/util/log.py

   1 # Copyright (C) 2014 Ipsilon Project Contributors
   2 #
   3 # See the file named COPYING for the project license
   4
   5 import cherrypy
   6 import cStringIO
   7 import inspect
   8 import os
   9 import traceback
  10
  11
  12 def log_request_response():
  13     '''Log the contents of the request and subsequent response.
  14
  15     This is run as a tool hook and should be run as the last hook
  16     (on_end_resource) just before the server writes the response to
  17     the client. The tool is registered like this:
  18
  19     cherrypy.tools.log_request_response = \
  20         cherrypy.Tool('on_end_resource', log_request_response)
  21
  22     Then the logging can be enabled or disabled via the config option,
  23     for example to turn the logging on via a config dict add this
  24     key/value pair:
  25
  26     'tools.log_request_response.on': True,
  27
  28     or for a config file add this line:
  29
  30     tools.log_request_response.on = True
  31
  32     At first blush it would seem easy to log the request received and
  33     the response emitted, just hook those locations and output the raw
  34     data. Unfortunately there are no such locations in cherrypy where
  35     you have access to the raw input and output data. The complicating
  36     factors are:
  37
  38     * Input data is consumed by the Request object off a file object
  39       which cannot seek back (i.e. rewind), once the data is read it
  40       cannot be read again. Therefore any attempt to log the raw input
  41       will starve read operations done by the Request object to read
  42       the headers and body.
  43
  44     * Automatic consumption and processing of the body contents is
  45       enabled by default (controlled by the
  46       request.process_request_body config option). You generally do
  47       not want to turn these automatic request processors off because
  48       they provide valuable input processing useful to the request
  49       processing pipeline (e.g. when Content-Type is
  50       application/x-www-form-urlencoded, or multipart/form-data or
  51       multipart). Most cherrypy page handlers expect this
  52       pre-processing to have been performed and the 'cooked' data to
  53       be availabe on the Request object. Thus rather than logging the
  54       raw input HTTP which will have been consumed by the request
  55       processing logic we are forced into logging only the 'cooked'
  56       values available to us after the request has been read and
  57       processed.
  58
  59     * The response body may not be available if response streaming is
  60       enabled. This is because control is passed directly to the
  61       object writing the data to the client bypassing the normal
  62       cherrypy hooks. Fortunately streaming is not recommended and we
  63       can expect it will be disabled. When streaming is disabled the
  64       response body can be composed as:
  65
  66         - string
  67         - list of strings
  68         - a generator yielding chunks of strings
  69
  70       When the response body is a string or list of strings logging
  71       the body and then passing down the pipeline to be written to the
  72       client is trivial. However when part of the body is produced by
  73       a generator we must run the generator to produce that part of
  74       the body and store it as a string. This is an issue equivalent
  75       to not being able to re-read a file object as seen in the input
  76       situation. Once the generator has run it cannot be run
  77       again. Therefore we consume all the body output, store it in a
  78       string, log it and then replace the request body contents with
  79       the body string we formed. It's this body string which is
  80       subsequently sent down the processing pipeline to be written to
  81       the client.
  82
  83     '''
  84
  85     # --- Begin local functions ---
  86
  87     def indent_text(text, level=0, indent='    '):
  88         '''
  89         Input is a block of text potentially containing newlines which
  90         seperate the text into a sequence of lines. The text block is
  91         split into individual lines and indented according to the
  92         indentation level. The width of the indent is controlled by
  93         the optional indent parameter.
  94
  95         The result is a single block of text where each of the
  96         original lines of text are indented.
  97         '''
  98
  99         f = cStringIO.StringIO()
 100
 101         lines = text.split('\n')
 102
 103         # Do not output trailing newline
 104         if lines and lines[-1] == '':
 105             lines.pop(-1)
 106
 107         for line in lines:
 108             f.write(indent*level)
 109             f.write(line)
 110             f.write('\n')
 111
 112         string = f.getvalue()
 113         f.close()
 114         return string
 115
 116     def print_part(part):
 117         '''
 118         Format a cherrypy._cpreqbody.Part object into a string.
 119
 120         When the request Content-Type is a multipart cherrypy splits
 121         each part of the multipart into a Part object containing
 122         information about the part and it's content.
 123         '''
 124         f = cStringIO.StringIO()
 125
 126         f.write(indent_text('Name = %s\n' % part.name))
 127         if part.headers:
 128             f.write(indent_text('Headers:\n'))
 129             for name, value in part.headers.items():
 130                 f.write(indent_text('%s: %s\n' % (name, value), 1))
 131
 132         f.write(indent_text("Body:\n"))
 133         f.write(indent_text(part.fullvalue(), 1))
 134
 135         string = f.getvalue()
 136         f.close()
 137         return string
 138
 139     def print_param(name, value):
 140         f = cStringIO.StringIO()
 141
 142         # Might be a multipart Part object, if so format it
 143         if isinstance(value, cherrypy._cpreqbody.Part):  # pylint:disable=W0212
 144             f.write(indent_text("%s:\n" % (name)))
 145             f.write(indent_text(print_part(value), 1))
 146         else:
 147             # Not a mulitpart, just write it as a string
 148             f.write(indent_text("%s: %s\n" % (name, value)))
 149
 150         string = f.getvalue()
 151         f.close()
 152         return string
 153
 154     def collapse_body(body):
 155         '''The cherrypy response body can be:
 156
 157         * string
 158         * list of strings
 159         * generator yielding a string
 160
 161         Generators are typically used for file contents but any
 162         cherrypy response is permitted to use a generator to provide
 163         the body of the response.
 164
 165         Strings and lists of strings are immediately available and
 166         stored in the request object. During normal cherrypy
 167         processing when writing the response to the client response
 168         data which is provided by a generator will be iterated over
 169         and written to the client. In order for us to be able to log
 170         all the response data prior to it being sent to the client we
 171         must also iterate over the generator provided content, however
 172         this exhausts the generator making it unavailable to be
 173         written to the client.
 174
 175         To solve this problem we collect all the response data. Now we
 176         have the full body contents, we can log it and then set this
 177         as the new body contents for remainder of the processing
 178         pipeline to act upon (i.e. sent to the client)
 179         '''
 180         f = cStringIO.StringIO()
 181
 182         for chunk in body:
 183             f.write(chunk)
 184
 185         string = f.getvalue()
 186         f.close()
 187         return string
 188
 189     # --- End local functions ---
 190
 191     f = cStringIO.StringIO()
 192     request = cherrypy.serving.request
 193     remote = request.remote
 194
 195     #
 196     # Log the Request
 197     #
 198     f.write(indent_text("<Request> [%s] %s\n" %
 199                         (remote.name or remote.ip, request.request_line), 0))
 200
 201     # Request Headers
 202     if request.headers:
 203         f.write(indent_text("Headers:\n", 1))
 204         for name, value in request.headers.items():
 205             f.write(indent_text("%s: %s\n" % (name, value), 2))
 206
 207     # Request parameters from URL query string and
 208     # x-www-form-urlencoded POST data
 209     if request.body.params:
 210         f.write(indent_text("Params:\n", 1))
 211         for name, value in request.body.params.items():
 212             # Multi-valued paramater is in a list
 213             if isinstance(value, list):
 214                 for i, item in enumerate(value):
 215                     f.write(indent_text(print_param("%s[%d]" % (name, i),
 216                                                     item), 2))
 217             else:
 218                 f.write(indent_text(print_param(name, value), 2))
 219
 220     # If the body is multipart format each of the parts
 221     if request.body.parts:
 222         f.write(indent_text("Body Parts:\n"))
 223         for i, part in enumerate(request.body.parts):
 224             f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3))
 225             f.write(indent_text(print_part(part), 4))
 226
 227     #
 228     # Log the Response
 229     #
 230     response = cherrypy.response
 231     f.write(indent_text("<Response> %s\n" % response.status, 0))
 232
 233     # Log the response headers
 234     if response.header_list:
 235         f.write(indent_text("Headers:\n", 1))
 236         for name, value in response.header_list:
 237             f.write(indent_text("%s: %s\n" % (name, value), 2))
 238
 239     # Log the response body
 240     #
 241     # We can only do this if the response is not streaming because we have
 242     # no way to hook the streaming content.
 243     f.write(indent_text("Body:\n", 1))
 244
 245     if response.stream:
 246         f.write(indent_text("body omitted because response is streaming\n", 2))
 247     else:
 248         response.body = collapse_body(response.body)
 249         for chunk in response.body:
 250             f.write(indent_text(chunk, 2))
 251
 252     string = f.getvalue()
 253     f.close()
 254     print string
 255
 256 cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource',
 257                                                     log_request_response)
 258
 259
 260 class Log(object):
 261
 262     @staticmethod
 263     def stacktrace():
 264         buf = cStringIO.StringIO()
 265
 266         stack = traceback.extract_stack()
 267         traceback.print_list(stack[:-2], file=buf)
 268
 269         stacktrace_string = buf.getvalue()
 270         buf.close()
 271         return stacktrace_string
 272
 273     @staticmethod
 274     def get_class_from_frame(frame_obj):
 275         '''
 276         Taken from:
 277         http://stackoverflow.com/questions/2203424/
 278         python-how-to-retrieve-class-information-from-a-frame-object
 279
 280         At the frame object level, there does not seem to be any way
 281         to find the actual python function object that has been
 282         called.
 283
 284         However, if your code relies on the common convention of naming
 285         the instance parameter of a method self, then you could do this.
 286         '''
 287
 288         args, _, _, value_dict = inspect.getargvalues(frame_obj)
 289         # Is the functions first parameter named 'self'?
 290         if len(args) and args[0] == 'self':
 291             # in that case, 'self' will be referenced in value_dict
 292             instance = value_dict.get('self', None)
 293             if instance:
 294                 # return its class
 295                 return getattr(instance, '__class__', None)
 296         # return None otherwise
 297         return None
 298
 299     @staticmethod
 300     def call_location():
 301         frame = inspect.stack()[2]
 302         frame_obj = frame[0]
 303         filename = frame[1]
 304         line_number = frame[2]
 305         func = frame[3]
 306
 307         # Only report the last 3 components of the path
 308         filename = os.sep.join(filename.split(os.sep)[-3:])
 309
 310         cls = Log.get_class_from_frame(frame_obj)
 311         if cls:
 312             location = '%s:%s %s.%s()' %  \
 313                        (filename, line_number, cls.__name__, func)
 314         else:
 315             location = '%s:%s %s()' % (filename, line_number, func)
 316         return location
 317
 318     def debug(self, fact):
 319         if cherrypy.config.get('debug', False):
 320             location = Log.call_location()
 321             cherrypy.log('DEBUG(%s): %s' % (location, fact))
 322
 323     # for compatibility with existing code
 324     _debug = debug
 325
 326     def log(self, fact):
 327         cherrypy.log(fact)
 328
 329     def error(self, fact):
 330         cherrypy.log.error('ERROR: %s' % fact)
 331         if cherrypy.config.get('stacktrace_on_error', False):
 332             cherrypy.log.error(Log.stacktrace())