ipsilon/util/log.py

   1 # Copyright (C) 2014 Ipsilon Project Contributors
   2 #
   3 # See the file named COPYING for the project license
   4
   5 import cherrypy
   6 import cStringIO
   7 import inspect
   8 import os
   9 import traceback
  10
  11 def log_request_response():
  12     '''Log the contents of the request and subsequent response.
  13
  14     This is run as a tool hook and should be run as the last hook
  15     (on_end_resource) just before the server writes the response to
  16     the client. The tool is registered like this:
  17
  18     cherrypy.tools.log_request_response = \
  19         cherrypy.Tool('on_end_resource', log_request_response)
  20
  21     Then the logging can be enabled or disabled via the config option,
  22     for example to turn the logging on via a config dict add this
  23     key/value pair:
  24
  25     'tools.log_request_response.on': True,
  26
  27     or for a config file add this line:
  28
  29     tools.log_request_response.on = True
  30
  31     At first blush it would seem easy to log the request received and
  32     the response emitted, just hook those locations and output the raw
  33     data. Unfortunately there are no such locations in cherrypy where
  34     you have access to the raw input and output data. The complicating
  35     factors are:
  36
  37     * Input data is consumed by the Request object off a file object
  38       which cannot seek back (i.e. rewind), once the data is read it
  39       cannot be read again. Therefore any attempt to log the raw input
  40       will starve read operations done by the Request object to read
  41       the headers and body.
  42
  43     * Automatic consumption and processing of the body contents is
  44       enabled by default (controlled by the
  45       request.process_request_body config option). You generally do
  46       not want to turn these automatic request processors off because
  47       they provide valuable input processing useful to the request
  48       processing pipeline (e.g. when Content-Type is
  49       application/x-www-form-urlencoded, or multipart/form-data or
  50       multipart). Most cherrypy page handlers expect this
  51       pre-processing to have been performed and the 'cooked' data to
  52       be availabe on the Request object. Thus rather than logging the
  53       raw input HTTP which will have been consumed by the request
  54       processing logic we are forced into logging only the 'cooked'
  55       values available to us after the request has been read and
  56       processed.
  57
  58     * The response body may not be available if response streaming is
  59       enabled. This is because control is passed directly to the
  60       object writing the data to the client bypassing the normal
  61       cherrypy hooks. Fortunately streaming is not recommended and we
  62       can expect it will be disabled. When streaming is disabled the
  63       response body can be composed as:
  64
  65         - string
  66         - list of strings
  67         - a generator yielding chunks of strings
  68
  69       When the response body is a string or list of strings logging
  70       the body and then passing down the pipeline to be written to the
  71       client is trivial. However when part of the body is produced by
  72       a generator we must run the generator to produce that part of
  73       the body and store it as a string. This is an issue equivalent
  74       to not being able to re-read a file object as seen in the input
  75       situation. Once the generator has run it cannot be run
  76       again. Therefore we consume all the body output, store it in a
  77       string, log it and then replace the request body contents with
  78       the body string we formed. It's this body string which is
  79       subsequently sent down the processing pipeline to be written to
  80       the client.
  81
  82     '''
  83
  84     #--- Begin local functions ---
  85
  86     def indent_text(text, level=0, indent='    '):
  87         '''
  88         Input is a block of text potentially containing newlines which
  89         seperate the text into a sequence of lines. The text block is
  90         split into individual lines and indented according to the
  91         indentation level. The width of the indent is controlled by
  92         the optional indent parameter.
  93
  94         The result is a single block of text where each of the
  95         original lines of text are indented.
  96         '''
  97
  98         f = cStringIO.StringIO()
  99
 100         lines = text.split('\n')
 101
 102         # Do not output trailing newline
 103         if lines and lines[-1] == '':
 104             lines.pop(-1)
 105
 106         for line in lines:
 107             f.write(indent*level)
 108             f.write(line)
 109             f.write('\n')
 110
 111         string = f.getvalue()
 112         f.close()
 113         return string
 114
 115     def print_part(part):
 116         '''
 117         Format a cherrypy._cpreqbody.Part object into a string.
 118
 119         When the request Content-Type is a multipart cherrypy splits
 120         each part of the multipart into a Part object containing
 121         information about the part and it's content.
 122         '''
 123         f = cStringIO.StringIO()
 124
 125         f.write(indent_text('Name = %s\n' % part.name))
 126         if part.headers:
 127             f.write(indent_text('Headers:\n'))
 128             for name, value in part.headers.items():
 129                 f.write(indent_text('%s: %s\n' % (name, value), 1))
 130
 131         f.write(indent_text("Body:\n"))
 132         f.write(indent_text(part.fullvalue(), 1))
 133
 134         string = f.getvalue()
 135         f.close()
 136         return string
 137
 138     def collapse_body(body):
 139         '''The cherrypy response body can be:
 140
 141         * string
 142         * list of strings
 143         * generator yielding a string
 144
 145         Generators are typically used for file contents but any
 146         cherrypy response is permitted to use a generator to provide
 147         the body of the response.
 148
 149         Strings and lists of strings are immediately available and
 150         stored in the request object. During normal cherrypy
 151         processing when writing the response to the client response
 152         data which is provided by a generator will be iterated over
 153         and written to the client. In order for us to be able to log
 154         all the response data prior to it being sent to the client we
 155         must also iterate over the generator provided content, however
 156         this exhausts the generator making it unavailable to be
 157         written to the client.
 158
 159         To solve this problem we collect all the response data. Now we
 160         have the full body contents, we can log it and then set this
 161         as the new body contents for remainder of the processing
 162         pipeline to act upon (i.e. sent to the client)
 163         '''
 164         f = cStringIO.StringIO()
 165
 166         for chunk in body:
 167             f.write(chunk)
 168
 169         string = f.getvalue()
 170         f.close()
 171         return string
 172
 173     #--- End local functions ---
 174
 175     f = cStringIO.StringIO()
 176     request = cherrypy.serving.request
 177     remote = request.remote
 178
 179     #
 180     # Log the Request
 181     #
 182     f.write(indent_text("<Request> [%s] %s\n" % \
 183                       (remote.name or remote.ip, request.request_line), 0))
 184
 185     # Request Headers
 186     if request.headers:
 187         f.write(indent_text("Headers:\n", 1))
 188         for name, value in request.headers.items():
 189             f.write(indent_text("%s: %s\n" % (name, value), 2))
 190
 191     # Request parameters from URL query string and
 192     # x-www-form-urlencoded POST data
 193     if request.body.params:
 194         f.write(indent_text("Params:\n", 1))
 195         for name, value in request.body.params.items():
 196             # Multi-valued paramater is in a list
 197             if isinstance(value, list):
 198                 for i, item in enumerate(value):
 199                     # Might be a multipart Part object, if so format it
 200                     if isinstance(item, cherrypy._cpreqbody.Part):
 201                         f.write(indent_text("%s[%s]:\n" % (name, i), 2))
 202                         f.write(indent_text(print_part(item), 3))
 203                     else:
 204                         # Not a mulitpart, just write it as a string
 205                         f.write(indent_text("%s[%s]: %s\n" % (name, i, item), 2))
 206             else:
 207                 # Just a string value
 208                 f.write(indent_text("%s: %s\n" % (name, value), 2))
 209
 210     # If the body is multipart format each of the parts
 211     if request.body.parts:
 212         f.write(indent_text("Body Parts:\n"))
 213         for i, part in enumerate(request.body.parts):
 214             f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3))
 215             f.write(indent_text(print_part(part), 4))
 216
 217     #
 218     # Log the Response
 219     #
 220     response = cherrypy.response
 221     f.write(indent_text("<Response> %s\n" % response.status, 0))
 222
 223     # Log the response headers
 224     if response.header_list:
 225         f.write(indent_text("Headers:\n", 1))
 226         for name, value in response.header_list:
 227             f.write(indent_text("%s: %s\n" % (name, value), 2))
 228
 229     # Log the response body
 230     #
 231     # We can only do this if the response is not streaming because we have
 232     # no way to hook the streaming content.
 233     f.write(indent_text("Body:\n", 1))
 234
 235     if response.stream:
 236         f.write(indent_text("body omitted because response is streaming\n", 2))
 237     else:
 238         response.body = collapse_body(response.body)
 239         for chunk in response.body:
 240             f.write(indent_text(chunk, 2))
 241
 242     string = f.getvalue()
 243     f.close()
 244     print string
 245
 246 cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource', log_request_response)
 247
 248
 249 class Log(object):
 250
 251     @staticmethod
 252     def stacktrace():
 253         buf = cStringIO.StringIO()
 254
 255         stack = traceback.extract_stack()
 256         traceback.print_list(stack[:-2], file=buf)
 257
 258         stacktrace_string = buf.getvalue()
 259         buf.close()
 260         return stacktrace_string
 261
 262     @staticmethod
 263     def get_class_from_frame(frame_obj):
 264         '''
 265         Taken from:
 266         http://stackoverflow.com/questions/2203424/
 267         python-how-to-retrieve-class-information-from-a-frame-object
 268
 269         At the frame object level, there does not seem to be any way
 270         to find the actual python function object that has been
 271         called.
 272
 273         However, if your code relies on the common convention of naming
 274         the instance parameter of a method self, then you could do this.
 275         '''
 276
 277         args, _, _, value_dict = inspect.getargvalues(frame_obj)
 278         # Is the functions first parameter named 'self'?
 279         if len(args) and args[0] == 'self':
 280         # in that case, 'self' will be referenced in value_dict
 281             instance = value_dict.get('self', None)
 282             if instance:
 283                 # return its class
 284                 return getattr(instance, '__class__', None)
 285         # return None otherwise
 286         return None
 287
 288     @staticmethod
 289     def call_location():
 290         frame = inspect.stack()[2]
 291         frame_obj = frame[0]
 292         filename = frame[1]
 293         line_number = frame[2]
 294         func = frame[3]
 295
 296         # Only report the last 3 components of the path
 297         filename = os.sep.join(filename.split(os.sep)[-3:])
 298
 299         cls = Log.get_class_from_frame(frame_obj)
 300         if cls:
 301             location = '%s:%s %s.%s()' %  \
 302                        (filename, line_number, cls.__name__, func)
 303         else:
 304             location = '%s:%s %s()' % (filename, line_number, func)
 305         return location
 306
 307
 308     def debug(self, fact):
 309         if cherrypy.config.get('debug', False):
 310             location = Log.call_location()
 311             cherrypy.log('DEBUG(%s): %s' % (location, fact))
 312
 313     # for compatibility with existing code
 314     _debug = debug
 315
 316     def log(self, fact):
 317         cherrypy.log(fact)
 318
 319     def error(self, fact):
 320         cherrypy.log.error('ERROR: %s' % fact)
 321         if cherrypy.config.get('stacktrace_on_error', False):
 322             cherrypy.log.error(Log.stacktrace())