Introduction

The web reporter script is a command line tool that parses the web root of any Evergreen instillation or development web root and reports which files depend upon each other. i.e. Javascripts that import dojo widgets depend upon the files that created those widgets, css that imports images depend upon those images etc. Note that not all dependencies are currently supported, the CSS is most notable, the script does a good job, but it isn't perfect.

Usage

Generates an HTML reference for Evergreen website dependancies.

Options:
  --version             show program's version number and exit
  -h, --help            show this help message and exit
  -e PATH, --install=PATH
                        specifies the location of the Evergreen install
                        (default: /openils/var/web)
  -o PATH, --output=PATH
                        specifies the location for the output (default: pwd)
  -w, --webbrowser      open the output once done
  -q, --quiet           be quiet (default: verbose)

Source

This script should be placed in a file named poirot_0.1.py

#!/usr/bin/env python
'''Generates the widget factory for Evergreen.

Copyright 2011 Joseph Lewis joehms22 [-at-] gmail [-dot-] com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA.

'''

import optparse
import os
import re
import mimetypes
import hashlib
import time


__author__ = "Joseph Lewis"
__copyright__ = "Copyright 2011, Joseph Lewis"
__license__ = "GPL"
__version__ = "Poirot 0.1"


# The regular expressions for matching dojo.provide, dojo.require, and
# src='foo/bar/baz' as well as css
PROVIDES = re.compile(r"""\s*dojo.provide\s*\(\s*['"]([\w.]+)['"]\s*\)\s*;\s*""")
REQUIRES = re.compile(r"""\s*dojo.require\s*\(\s*['"]([\w.]+)['"]\s*\)\s*;\s*""")
INCLUDES = re.compile(r"""src\s*=\s*['"]([\w._/]+)['"]""")
CSS_INCL = re.compile(r"""url\s*\(\s*['"]*([^'")]+)['"]*\s*\)[;]*""")

sourcefile_map = {} # A map of sourcefiles with their names.

DATE = time.strftime("%Y-%m-%d")

options = None  # The options provided by optparse


# USED FOR TESTING PURPOSES SO TYPING THE ARGS WOULDN'T BE NECESSARY
# EVERY TIME.
DEBUG = False
if DEBUG:
    import sys
    sys.argv = ['-q', '-e', '/home/joseph/git/joelewis/Open-ILS/web']
    
#/END TEST/


def list_files(directory):
    '''Returns a list of all files (with full paths) in a directory.
    if recurse is set to true, then all sub directories are included.
    
    Arguments:
        directory - A string representing the directory to traverse.
    
    '''
    
    if not options.quiet:
        print ("Finding all files in directory: %s" % (directory))
    
    file_list = []
    for r, s, f in os.walk(directory):
        for fi in f:
            file_list.append(os.path.join(r, fi))
    return file_list


class SourceFile:
    '''
    Represents a file of source code.
    
    Attributes:
        path - The abs path to the file in question.
        base - The basename of this file (including extension)
        ext - The extension of this file or the basename if no ext.
        anchor - The anchor used to refer to this file on a webpage.
        src - The path to the source file of the code.
        
        widget_depends - The widgets this file depends upon.
        file_depends   - The files this file depends upon (img tags mostly)
        js_depends     - The javascripts this file depedns upon.
        provides       - The dojo widgets this file provides.
    
    
    '''
    def __init__(self, path):
        self.src = path
        
        # If the path is abs, make it relative to the web root.
        if path.startswith(options.install):
            self.path = path[len(options.install):]
        else:
            self.path = path
            
        # Normalize the path so we don't have things like: "/../../hi"
        self.path = os.path.normpath(self.path)
        
        self.base = os.path.basename(path)
        self.ext = path.split('.')[-1].lower()
        
        # Create a theoretically unique anchor for referencing this
        # widget on the webpage.
        self.anchor = hashlib.md5(self.path).hexdigest()
        
        self.widget_depends = set()
        self.file_depends = set()
        self.js_depends = set()
        self.provides = set()
        
        # Parse the file we belong to (if possible)
        try:
            me = open(path)
            for line in me:
                p = PROVIDES.match(line)
                if p:
                    self.provides.add(p.group(1))
                    continue
                
                r = REQUIRES.match(line)
                if r:
                    self.widget_depends.add(r.group(1))
                    continue
                
                i = INCLUDES.search(line)
                if i:
                    self.__add_resource(i.group(1))
                    continue
                    
                c = CSS_INCL.search(line)
                if c:
                    self.__add_resource(c.group(1))
        except IOError, e:
            print("Error Encountered: %s" % (e))

    def __add_resource(self, path):
        ''' Adds a resource with the given file path to the proper 
        resource set; also cleans up the path.
        
        '''
        
        # Make this an abs path relative to this object if it isn't one. 
        if not os.path.isabs(path):
            path = os.path.join(self.path[:-len(self.base)], path)
        
        path = os.path.normpath(path)
        
        # If the resource is a javascript put it in that category.
        if path.endswith('js'):
            self.js_depends.add(path)
        else:  # If it is an iframe, or image put it in the general depends.
            self.file_depends.add(path)
    
    def names(self):
        '''Returns a list of names this item has.
        i.e. a widget might have a name 'dojo.dijit.foo.bar'
        and a file might have the name '/foo/bar/baz' the names for 
        this class would then be
        ['dojo.dijit.foo.bar', '/foo/bar/baz']
        
        '''
        tmp = []
        tmp.append(self.path)
        for item in self.provides:
            tmp.append(item)
            
        return tmp
        
        
    def mainNames(self):
        '''Returns a list of the main names of this item, that is
        the names of the type this item is.
        
        If the item is a widget returns the names of the widgets it
        provides. If the item is a file or script, returns the path.
        
        
        '''
        if self.iswidget():
            return self.provides
        return [self.path]
        
        
    def depends(self):
        '''Returns a set of dependencies for this file.
        
        '''
        return self.widget_depends | self.file_depends | self.js_depends
    
    
    def iswidget(self, key=None):
        '''Returns true if this is a widget. If a key is provided, this
        function checks if the given name for the item points to a 
        widget or the file itself: i.e. key='dojox.grid' would return
        True, while /foo/bar/baz/dojo.js would return False.
        
        '''
        if key:
            return bool(self.provides) and '/' not in key
        return bool(self.provides)
    
    
    def isscript(self):
        return self.ext.lower() == 'js'
    
    def isfile(self):
        return (not self.iswidget() and not self.isscript())
        
    
    def isempty(self):
        if self.provides or self.widget_depends or self.file_depends or self.js_depends:
            return False
        return True
        
        
    def src_link(self):
        '''Provides a link to the source of this file.'''
        return "<a class='warn' href='%s'>[Source]</a>" % (self.src)
        
    
    def get_link(self, name, src=True):
        '''Provides a link to this file. The param name gives what the
        link should say (inside the <a> tags).
        
        Params:
            name - The text to show as this link.
            src - Whether or not to include the [Source] link,
                  (Default: True)
        
        '''
        sl = ""
        if src:
            sl = self.src_link()
        
        return "%s <a href='#%s'>%s</a>" % (sl, self.anchor, name)
    
    def _make_list(self, title, items, link=True):
        '''Creates and returns a title and HTML list of the widgets 
        given.
        
        If the list is empty, returns nothing.
        
        '''
        
        if not items:
            return ""
        
        output = "\n\n<h3>%s</h3>\n\t<ul>\n" % (title)
        
        for item in sorted(items):
            if link:
                if source(item):
                    internal = source(item).get_link(item)
                else:
                    internal = "<span class='warn'>[No Src]</span> %s" % (item)
            else:
                internal = item
                
            output += "\t\t<li>%s</li>\n" % (internal)
        
        output += "\t</ul>\n\n"
        return output
    
    
    def toHTML(self):
        out = "\n\n<div class='fileinfo'>"
        out += "<a name='%s' href='#%s'><h2>%s</h2></a>" % (self.anchor,self.anchor, self.path)
        out += "<div class='toolbar'>%s</div>" % (self.src_link())
          
        out += self._make_list("Provides Widgets:", self.provides, False)
        out += self._make_list("Uses Widgets:", self.widget_depends)
        out += self._make_list("Uses Scripts:", self.js_depends)
        out += self._make_list("Uses Resources:", self.file_depends)
        out += self._make_list("Indirectly Uses Resources:", 
                                set(self.list_indirects()) - self.depends())
        
        firstRev = True
        for name in sorted(self.names()):
            if reverseMap(name):
                if firstRev:
                    out += "<h3>Used By:</h3><ul>"
                    firstRev = False
                out += "<li><b>Files that reference:</b> %s</li><ul>" % name
                for i in sorted(reverseMap(name)):
                    sf = source(i)
                    if sf:
                        out += "<li>%s</li>" % sf.get_link(i)
                    else:
                        out += "<li><span class='warn'>[No Src]</span> %s</li>" % (w)
                out += "</ul>"
        if not firstRev:
            out += "</ul>"
            
        try:
            if mimetypes.guess_type(self.src)[0].startswith('image'):
                out += "<h3>Preview:</h3><img class='prev' src='%s'></img>" % (self.src)
        except: # If for some reason mime returns something that isn't a mime.
            pass
        
        out += "</div>"
        return out
        
    def list_indirects(self):
        '''
        Creates a list of indirect dependancies, i.e. a depends on b
        and b depends on c, so a also depends on c.
        
        These will all be the string representation of the widgets 
        which can be converted to the actual widgets by using the 
        source() function.
        
        '''
        
        tocheck = self.widget_depends | self.file_depends | self.js_depends
        checked = set()
        
        
        while tocheck:
            checking = tocheck.pop()
            checked.add(checking)
            
            checkingsrc = source(checking)
            
            if not checkingsrc:
                continue
            
            for item in checkingsrc.depends():
                if item not in checked:
                    tocheck.add(item)
        
        return checked


def parse_files():
    '''Creates a list of source files and searches them for 
    dependancies.
    
    '''
    for f in list_files(options.install):
        tmp = SourceFile(f)
        
        if not options.quiet:
            print ("Parsed: %s" % (tmp.path))

        for name in tmp.names():
            sourcefile_map[name] = tmp
            
            if not options.quiet:
                print ("\tFound Item: %s" % (name))

revMap = {}
def reverseMap(item, reset=False):
    '''Tells an object all of those items that depend upon it, the 
    first time this is run it generates the table, after that simple
    lookups are made.
    
    If the tables have been updated, call reset=true in the params
    to force an update.
    
    PARAMS:
        item - A string representing the item to search for.
    RETURN:
        A map of all of the other items that depend on this.
    
    '''
    global revMap
    
    if reset or not revMap:
        # Create Associations
        revMap = {}
        
        for sf in sourcefile_map.values():
            for d in sf.depends():
                try:
                    for name in sf.mainNames():
                        revMap[d].add(name)
                except KeyError:
                    revMap[d] = set()
                    for name in sf.mainNames():
                        revMap[d].add(name)
    
    try:
        return revMap[item]
    except KeyError:
        return set()


def source(key):
    '''Returns the source file with the given key, or None if none
    found.
    
    '''
    try:
        return sourcefile_map[key]
    except KeyError:
        return None


def output():
    title = "Evergreen Widget Report: %s" % (DATE)
    o = "<html><head><title>%s</title>" % (title)
    o += """<style>
            body { margin:0px; padding:0px; font-family:arial,helvetica; width:100%;} 
            h1 {text-align:center; background-color:#399475; color:#fff;} 
            .fileinfo h2 { color:#FFF; background-color:#399475; padding:5px; margin:0px;}
            a { text-decoration: none; }
            #toc ul { width: 100%; }
            #toc ul li { float: left; width: 50%; }
            #toc h3 { clear:left;}
            #toc br {clear:left;}
            .toolbar { display:block; background-color:#493a94; width:100%; padding:0px;}
            .toolbar a {color:#fff; padding:5px;}
            .prev { align:middle; margin:5px;}
            .warn { color:#BFBFBF; font-family:monospace;}
            #footer { text-align:center; background-color:#BFBFBF; width:100%;}


            </style></head>"""
    o += "<body><h1>%s</h1><h2>Table of Contents</h2>" % (title)
    o += "<div id='toc'>%s</div>" % gen_toc()
    
    for sf in sourcefile_map.values():
        o += sf.toHTML()
        
    o += """<div id='footer'>Generated by %s<br>
            &copy; %s %s <br> 
            License: %s </div>""" % (__version__, __author__, DATE, __license__) 
    o += "</body></html>"
    
    return o


def gen_toc():
    '''Generates the table of contents.'''
    widget_map = {}
    script_map = {}
    file_map = {}
    
    for key, value in sourcefile_map.items():
        if value.iswidget(key):
            widget_map[key] = value
        elif value.isscript():
            script_map[key] = value
        elif value.isfile():
            file_map[key] = value
        
    
    tmp = "<h3>Widgets</h3>\n<ul>"
    for key in sorted(widget_map.keys()):
        sf = widget_map[key]
        tmp += "<li>%s</li>\n" % (sf.get_link(key, False))
    
    tmp += "</ul><br>\n<h3>Scripts</h3>\n<ul>"
    for key in sorted(script_map.keys(), key=lambda sf: script_map[sf].base):
        sf = script_map[key]
        tmp += "<li>%s</li>\n" % (sf.get_link(sf.base, False))
        
    tmp += "</ul><br>\n<h3>Files</h3>\n<ul>"
    for key in sorted(file_map.keys(), key=lambda sf: file_map[sf].base):
        sf = file_map[key]
        tmp += "<li>%s</li>\n" % (sf.get_link(sf.base, False))
        
    tmp += "</ul><br>"
    
    return tmp


def main():
    '''Runs program and handles command line options'''
    global options
    
    p = optparse.OptionParser(description='Generates an HTML reference for Evergreen website dependancies.',
                                  prog='poirot',
                                  version=__version__,
                                  usage='%prog')
                                  
    p.add_option('-e', '--install',
                    help='specifies the location of the Evergreen install (default: /openils/var/web)', 
                    default='/openils/var/web',
                    metavar='PATH')
    p.add_option('-o', '--output', 
                    help='specifies the location for the output (default: pwd)', 
                    metavar='PATH')
    p.add_option('-w', '--webbrowser', action='store_true', help='open the output once done', default=False)
    p.add_option('-q', '--quiet', action='store_true', help='be quiet (default: verbose)', default=False)
    
    
    options, arguments = p.parse_args()
    
    # Look for all of the places the widgets are used.
    parse_files()
    
    # Generate output.
    filename = "evergreen_widget_report_%s.html" % (DATE)
    
    try:
        out = open(filename, 'w')
        out.write(output())
    except IOError:
        print("Error, can't write to the output file. %s" % (filename))
    
    # Open webbrowser.
    if options.webbrowser:
        import webbrowser
        webbrowser.open(filename)

if __name__ == '__main__':
    main()