From e85f190d8e081fbbfeadca24781266d1c3e1bba1 Mon Sep 17 00:00:00 2001
From: John Dennis <jdennis@redhat.com>
Date: Tue, 20 Jan 2015 17:13:34 -0500
Subject: [PATCH] Add request/response logging via cherrypy tool hooks

The ability to easily review the HTTP Ipsilon request and response is
boon for development and issue debugging. Normally these HTTP
conversations occur on SSL/TLS encrypted connections making it
difficult to use other tools to view the traffic. Client side tools
have known pitfalls (e.g. Firebug) and not all conversations are
browser initiated (e.g. SAML ECP). Logging performed by the server
hosting Ipsilon makes logging at the server level server specific
(e.g. Apache's dumpio requires post-processing the log file to extract
and reassamble the HTTP conversation). The best place to log requests
and responses is within Ipsilon using the cherrypy framework
Ipsilon is embedded in. Cherrypy provides user defined hooks that can
be invoked at specific places in the request pipeline. We establish a
hook at the last stage just before the response is written to the
client, it logs the incoming request and outgoing response.

Resolves: https://fedorahosted.org/ipsilon/ticket/44

Signed-off-by: John Dennis <jdennis@redhat.com>
Reviewed-by: Simo Sorce <simo@redhat.com>
---
 examples/ipsilon.conf          |   1 +
 ipsilon/util/log.py            | 238 +++++++++++++++++++++++++++++++++
 templates/install/ipsilon.conf |   1 +
 3 files changed, 240 insertions(+)

diff --git a/examples/ipsilon.conf b/examples/ipsilon.conf
index eddde2f..e89f970 100644
--- a/examples/ipsilon.conf
+++ b/examples/ipsilon.conf
@@ -1,5 +1,6 @@
 [global]
 debug = True
+tools.log_request_response.on = False
 template_dir = "templates"
 
 log.screen = True
diff --git a/ipsilon/util/log.py b/ipsilon/util/log.py
index fb2c620..a010b16 100644
--- a/ipsilon/util/log.py
+++ b/ipsilon/util/log.py
@@ -4,6 +4,244 @@
 
 import cherrypy
 import inspect
+import cStringIO
+
+def log_request_response():
+    '''Log the contents of the request and subsequent response.
+
+    This is run as a tool hook and should be run as the last hook
+    (on_end_resource) just before the server writes the response to
+    the client. The tool is registered like this:
+
+    cherrypy.tools.log_request_response = \
+        cherrypy.Tool('on_end_resource', log_request_response)
+
+    Then the logging can be enabled or disabled via the config option,
+    for example to turn the logging on via a config dict add this
+    key/value pair:
+
+    'tools.log_request_response.on': True,
+
+    or for a config file add this line:
+
+    tools.log_request_response.on = True
+
+    At first blush it would seem easy to log the request received and
+    the response emitted, just hook those locations and output the raw
+    data. Unfortunately there are no such locations in cherrypy where
+    you have access to the raw input and output data. The complicating
+    factors are:
+
+    * Input data is consumed by the Request object off a file object
+      which cannot seek back (i.e. rewind), once the data is read it
+      cannot be read again. Therefore any attempt to log the raw input
+      will starve read operations done by the Request object to read
+      the headers and body.
+
+    * Automatic consumption and processing of the body contents is
+      enabled by default (controlled by the
+      request.process_request_body config option). You generally do
+      not want to turn these automatic request processors off because
+      they provide valuable input processing useful to the request
+      processing pipeline (e.g. when Content-Type is
+      application/x-www-form-urlencoded, or multipart/form-data or
+      multipart). Most cherrypy page handlers expect this
+      pre-processing to have been performed and the 'cooked' data to
+      be availabe on the Request object. Thus rather than logging the
+      raw input HTTP which will have been consumed by the request
+      processing logic we are forced into logging only the 'cooked'
+      values available to us after the request has been read and
+      processed.
+
+    * The response body may not be available if response streaming is
+      enabled. This is because control is passed directly to the
+      object writing the data to the client bypassing the normal
+      cherrypy hooks. Fortunately streaming is not recommended and we
+      can expect it will be disabled. When streaming is disabled the
+      response body can be composed as:
+
+        - string
+        - list of strings
+        - a generator yielding chunks of strings
+
+      When the response body is a string or list of strings logging
+      the body and then passing down the pipeline to be written to the
+      client is trivial. However when part of the body is produced by
+      a generator we must run the generator to produce that part of
+      the body and store it as a string. This is an issue equivalent
+      to not being able to re-read a file object as seen in the input
+      situation. Once the generator has run it cannot be run
+      again. Therefore we consume all the body output, store it in a
+      string, log it and then replace the request body contents with
+      the body string we formed. It's this body string which is
+      subsequently sent down the processing pipeline to be written to
+      the client.
+
+    '''
+
+    #--- Begin local functions ---
+
+    def indent_text(text, level=0, indent='    '):
+        '''
+        Input is a block of text potentially containing newlines which
+        seperate the text into a sequence of lines. The text block is
+        split into individual lines and indented according to the
+        indentation level. The width of the indent is controlled by
+        the optional indent parameter.
+
+        The result is a single block of text where each of the
+        original lines of text are indented.
+        '''
+
+        f = cStringIO.StringIO()
+
+        lines = text.split('\n')
+
+        # Do not output trailing newline
+        if lines and lines[-1] == '':
+            lines.pop(-1)
+
+        for line in lines:
+            f.write(indent*level)
+            f.write(line)
+            f.write('\n')
+
+        string = f.getvalue()
+        f.close()
+        return string
+
+    def print_part(part):
+        '''
+        Format a cherrypy._cpreqbody.Part object into a string.
+
+        When the request Content-Type is a multipart cherrypy splits
+        each part of the multipart into a Part object containing
+        information about the part and it's content.
+        '''
+        f = cStringIO.StringIO()
+
+        f.write(indent_text('Name = %s\n' % part.name))
+        if part.headers:
+            f.write(indent_text('Headers:\n'))
+            for name, value in part.headers.items():
+                f.write(indent_text('%s: %s\n' % (name, value), 1))
+
+        f.write(indent_text("Body:\n"))
+        f.write(indent_text(part.fullvalue(), 1))
+
+        string = f.getvalue()
+        f.close()
+        return string
+
+    def collapse_body(body):
+        '''The cherrypy response body can be:
+
+        * string
+        * list of strings
+        * generator yielding a string
+
+        Generators are typically used for file contents but any
+        cherrypy response is permitted to use a generator to provide
+        the body of the response.
+
+        Strings and lists of strings are immediately available and
+        stored in the request object. During normal cherrypy
+        processing when writing the response to the client response
+        data which is provided by a generator will be iterated over
+        and written to the client. In order for us to be able to log
+        all the response data prior to it being sent to the client we
+        must also iterate over the generator provided content, however
+        this exhausts the generator making it unavailable to be
+        written to the client.
+
+        To solve this problem we collect all the response data. Now we
+        have the full body contents, we can log it and then set this
+        as the new body contents for remainder of the processing
+        pipeline to act upon (i.e. sent to the client)
+        '''
+        f = cStringIO.StringIO()
+
+        for chunk in body:
+            f.write(chunk)
+
+        string = f.getvalue()
+        f.close()
+        return string
+
+    #--- End local functions ---
+
+    f = cStringIO.StringIO()
+    request = cherrypy.serving.request
+    remote = request.remote
+
+    #
+    # Log the Request
+    #
+    f.write(indent_text("<Request> [%s] %s\n" % \
+                      (remote.name or remote.ip, request.request_line), 0))
+
+    # Request Headers
+    if request.headers:
+        f.write(indent_text("Headers:\n", 1))
+        for name, value in request.headers.items():
+            f.write(indent_text("%s: %s\n" % (name, value), 2))
+
+    # Request parameters from URL query string and
+    # x-www-form-urlencoded POST data
+    if request.body.params:
+        f.write(indent_text("Params:\n", 1))
+        for name, value in request.body.params.items():
+            # Multi-valued paramater is in a list
+            if isinstance(value, list):
+                for i, item in enumerate(value):
+                    # Might be a multipart Part object, if so format it
+                    if isinstance(item, cherrypy._cpreqbody.Part):
+                        f.write(indent_text("%s[%s]:\n" % (name, i), 2))
+                        f.write(indent_text(print_part(item), 3))
+                    else:
+                        # Not a mulitpart, just write it as a string
+                        f.write(indent_text("%s[%s]: %s\n" % (name, i, item), 2))
+            else:
+                # Just a string value
+                f.write(indent_text("%s: %s\n" % (name, value), 2))
+
+    # If the body is multipart format each of the parts
+    if request.body.parts:
+        f.write(indent_text("Body Parts:\n"))
+        for i, part in enumerate(request.body.parts):
+            f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3))
+            f.write(indent_text(print_part(part), 4))
+
+    #
+    # Log the Response
+    #
+    response = cherrypy.response
+    f.write(indent_text("<Response> %s\n" % response.status, 0))
+
+    # Log the response headers
+    if response.header_list:
+        f.write(indent_text("Headers:\n", 1))
+        for name, value in response.header_list:
+            f.write(indent_text("%s: %s\n" % (name, value), 2))
+
+    # Log the response body
+    #
+    # We can only do this if the response is not streaming because we have
+    # no way to hook the streaming content.
+    f.write(indent_text("Body:\n", 1))
+
+    if response.stream:
+        f.write(indent_text("body omitted because response is streaming\n", 2))
+    else:
+        response.body = collapse_body(response.body)
+        for chunk in response.body:
+            f.write(indent_text(chunk, 2))
+
+    string = f.getvalue()
+    f.close()
+    print string
+
+cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource', log_request_response)
 
 
 class Log(object):
diff --git a/templates/install/ipsilon.conf b/templates/install/ipsilon.conf
index 4b170ca..522cb38 100644
--- a/templates/install/ipsilon.conf
+++ b/templates/install/ipsilon.conf
@@ -1,5 +1,6 @@
 [global]
 debug = ${debugging}
+tools.log_request_response.on = False
 template_dir = "templates"
 
 log.screen = ${debugging}
-- 
2.20.1