====== Introduction ====== The web reporter script is a command line tool that parses the web root of any Evergreen instillation or development web root and reports which files depend upon each other. i.e. Javascripts that import dojo widgets depend upon the files that created those widgets, css that imports images depend upon those images etc. Note that not all dependencies are currently supported, the CSS is most notable, the script does a good job, but it isn't perfect. ====== Usage ====== Generates an HTML reference for Evergreen website dependancies. Options: --version show program's version number and exit -h, --help show this help message and exit -e PATH, --install=PATH specifies the location of the Evergreen install (default: /openils/var/web) -o PATH, --output=PATH specifies the location for the output (default: pwd) -w, --webbrowser open the output once done -q, --quiet be quiet (default: verbose) ====== Source ====== This script should be placed in a file named [[http://en.wikipedia.org/wiki/Hercule_Poirot|poirot_0.1.py]] #!/usr/bin/env python '''Generates the widget factory for Evergreen. Copyright 2011 Joseph Lewis joehms22 [-at-] gmail [-dot-] com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ''' import optparse import os import re import mimetypes import hashlib import time __author__ = "Joseph Lewis" __copyright__ = "Copyright 2011, Joseph Lewis" __license__ = "GPL" __version__ = "Poirot 0.1" # The regular expressions for matching dojo.provide, dojo.require, and # src='foo/bar/baz' as well as css PROVIDES = re.compile(r"""\s*dojo.provide\s*\(\s*['"]([\w.]+)['"]\s*\)\s*;\s*""") REQUIRES = re.compile(r"""\s*dojo.require\s*\(\s*['"]([\w.]+)['"]\s*\)\s*;\s*""") INCLUDES = re.compile(r"""src\s*=\s*['"]([\w._/]+)['"]""") CSS_INCL = re.compile(r"""url\s*\(\s*['"]*([^'")]+)['"]*\s*\)[;]*""") sourcefile_map = {} # A map of sourcefiles with their names. DATE = time.strftime("%Y-%m-%d") options = None # The options provided by optparse # USED FOR TESTING PURPOSES SO TYPING THE ARGS WOULDN'T BE NECESSARY # EVERY TIME. DEBUG = False if DEBUG: import sys sys.argv = ['-q', '-e', '/home/joseph/git/joelewis/Open-ILS/web'] #/END TEST/ def list_files(directory): '''Returns a list of all files (with full paths) in a directory. if recurse is set to true, then all sub directories are included. Arguments: directory - A string representing the directory to traverse. ''' if not options.quiet: print ("Finding all files in directory: %s" % (directory)) file_list = [] for r, s, f in os.walk(directory): for fi in f: file_list.append(os.path.join(r, fi)) return file_list class SourceFile: ''' Represents a file of source code. Attributes: path - The abs path to the file in question. base - The basename of this file (including extension) ext - The extension of this file or the basename if no ext. anchor - The anchor used to refer to this file on a webpage. src - The path to the source file of the code. widget_depends - The widgets this file depends upon. file_depends - The files this file depends upon (img tags mostly) js_depends - The javascripts this file depedns upon. provides - The dojo widgets this file provides. ''' def __init__(self, path): self.src = path # If the path is abs, make it relative to the web root. if path.startswith(options.install): self.path = path[len(options.install):] else: self.path = path # Normalize the path so we don't have things like: "/../../hi" self.path = os.path.normpath(self.path) self.base = os.path.basename(path) self.ext = path.split('.')[-1].lower() # Create a theoretically unique anchor for referencing this # widget on the webpage. self.anchor = hashlib.md5(self.path).hexdigest() self.widget_depends = set() self.file_depends = set() self.js_depends = set() self.provides = set() # Parse the file we belong to (if possible) try: me = open(path) for line in me: p = PROVIDES.match(line) if p: self.provides.add(p.group(1)) continue r = REQUIRES.match(line) if r: self.widget_depends.add(r.group(1)) continue i = INCLUDES.search(line) if i: self.__add_resource(i.group(1)) continue c = CSS_INCL.search(line) if c: self.__add_resource(c.group(1)) except IOError, e: print("Error Encountered: %s" % (e)) def __add_resource(self, path): ''' Adds a resource with the given file path to the proper resource set; also cleans up the path. ''' # Make this an abs path relative to this object if it isn't one. if not os.path.isabs(path): path = os.path.join(self.path[:-len(self.base)], path) path = os.path.normpath(path) # If the resource is a javascript put it in that category. if path.endswith('js'): self.js_depends.add(path) else: # If it is an iframe, or image put it in the general depends. self.file_depends.add(path) def names(self): '''Returns a list of names this item has. i.e. a widget might have a name 'dojo.dijit.foo.bar' and a file might have the name '/foo/bar/baz' the names for this class would then be ['dojo.dijit.foo.bar', '/foo/bar/baz'] ''' tmp = [] tmp.append(self.path) for item in self.provides: tmp.append(item) return tmp def mainNames(self): '''Returns a list of the main names of this item, that is the names of the type this item is. If the item is a widget returns the names of the widgets it provides. If the item is a file or script, returns the path. ''' if self.iswidget(): return self.provides return [self.path] def depends(self): '''Returns a set of dependencies for this file. ''' return self.widget_depends | self.file_depends | self.js_depends def iswidget(self, key=None): '''Returns true if this is a widget. If a key is provided, this function checks if the given name for the item points to a widget or the file itself: i.e. key='dojox.grid' would return True, while /foo/bar/baz/dojo.js would return False. ''' if key: return bool(self.provides) and '/' not in key return bool(self.provides) def isscript(self): return self.ext.lower() == 'js' def isfile(self): return (not self.iswidget() and not self.isscript()) def isempty(self): if self.provides or self.widget_depends or self.file_depends or self.js_depends: return False return True def src_link(self): '''Provides a link to the source of this file.''' return "[Source]" % (self.src) def get_link(self, name, src=True): '''Provides a link to this file. The param name gives what the link should say (inside the tags). Params: name - The text to show as this link. src - Whether or not to include the [Source] link, (Default: True) ''' sl = "" if src: sl = self.src_link() return "%s %s" % (sl, self.anchor, name) def _make_list(self, title, items, link=True): '''Creates and returns a title and HTML list of the widgets given. If the list is empty, returns nothing. ''' if not items: return "" output = "\n\n

%s

\n\t\n\n" return output def toHTML(self): out = "\n\n
" out += "

%s

" % (self.anchor,self.anchor, self.path) out += "
%s
" % (self.src_link()) out += self._make_list("Provides Widgets:", self.provides, False) out += self._make_list("Uses Widgets:", self.widget_depends) out += self._make_list("Uses Scripts:", self.js_depends) out += self._make_list("Uses Resources:", self.file_depends) out += self._make_list("Indirectly Uses Resources:", set(self.list_indirects()) - self.depends()) firstRev = True for name in sorted(self.names()): if reverseMap(name): if firstRev: out += "

Used By:

" try: if mimetypes.guess_type(self.src)[0].startswith('image'): out += "

Preview:

" % (self.src) except: # If for some reason mime returns something that isn't a mime. pass out += "
" return out def list_indirects(self): ''' Creates a list of indirect dependancies, i.e. a depends on b and b depends on c, so a also depends on c. These will all be the string representation of the widgets which can be converted to the actual widgets by using the source() function. ''' tocheck = self.widget_depends | self.file_depends | self.js_depends checked = set() while tocheck: checking = tocheck.pop() checked.add(checking) checkingsrc = source(checking) if not checkingsrc: continue for item in checkingsrc.depends(): if item not in checked: tocheck.add(item) return checked def parse_files(): '''Creates a list of source files and searches them for dependancies. ''' for f in list_files(options.install): tmp = SourceFile(f) if not options.quiet: print ("Parsed: %s" % (tmp.path)) for name in tmp.names(): sourcefile_map[name] = tmp if not options.quiet: print ("\tFound Item: %s" % (name)) revMap = {} def reverseMap(item, reset=False): '''Tells an object all of those items that depend upon it, the first time this is run it generates the table, after that simple lookups are made. If the tables have been updated, call reset=true in the params to force an update. PARAMS: item - A string representing the item to search for. RETURN: A map of all of the other items that depend on this. ''' global revMap if reset or not revMap: # Create Associations revMap = {} for sf in sourcefile_map.values(): for d in sf.depends(): try: for name in sf.mainNames(): revMap[d].add(name) except KeyError: revMap[d] = set() for name in sf.mainNames(): revMap[d].add(name) try: return revMap[item] except KeyError: return set() def source(key): '''Returns the source file with the given key, or None if none found. ''' try: return sourcefile_map[key] except KeyError: return None def output(): title = "Evergreen Widget Report: %s" % (DATE) o = "%s" % (title) o += """""" o += "

%s

Table of Contents

" % (title) o += "
%s
" % gen_toc() for sf in sourcefile_map.values(): o += sf.toHTML() o += """""" % (__version__, __author__, DATE, __license__) o += "" return o def gen_toc(): '''Generates the table of contents.''' widget_map = {} script_map = {} file_map = {} for key, value in sourcefile_map.items(): if value.iswidget(key): widget_map[key] = value elif value.isscript(): script_map[key] = value elif value.isfile(): file_map[key] = value tmp = "

Widgets

\n
\n

Scripts

\n
\n

Files

\n
" return tmp def main(): '''Runs program and handles command line options''' global options p = optparse.OptionParser(description='Generates an HTML reference for Evergreen website dependancies.', prog='poirot', version=__version__, usage='%prog') p.add_option('-e', '--install', help='specifies the location of the Evergreen install (default: /openils/var/web)', default='/openils/var/web', metavar='PATH') p.add_option('-o', '--output', help='specifies the location for the output (default: pwd)', metavar='PATH') p.add_option('-w', '--webbrowser', action='store_true', help='open the output once done', default=False) p.add_option('-q', '--quiet', action='store_true', help='be quiet (default: verbose)', default=False) options, arguments = p.parse_args() # Look for all of the places the widgets are used. parse_files() # Generate output. filename = "evergreen_widget_report_%s.html" % (DATE) try: out = open(filename, 'w') out.write(output()) except IOError: print("Error, can't write to the output file. %s" % (filename)) # Open webbrowser. if options.webbrowser: import webbrowser webbrowser.open(filename) if __name__ == '__main__': main()