# plotavg.py # Mark Rowan, School of Computer Science, University of Birmingham, UK # December 2012 # For plotting 'average of averages' of activity and scale when a number # of runs with identical parameters, but different random seeds, have been # done. # Given a filepath, this simple script checks all subdirectories for any # 'activity.npz' and 'scale.npz' files (which contain activity and scale # factor data as plotted in the PDFs). # It calculates the mean and std of the various runs' data and presents # this on a similar-looking graph in the directory in which the script # was called, as scale.pdf and activity.pdf import sys import os import matplotlib matplotlib.use('agg') # Prevent pyplot from trying to open a graphical front-end on headless clients from matplotlib import pyplot import numpy as np filepath = sys.argv[1] # argv[0] is the name of the script, argv[1] is the filepath print "\nLoading data from %s" % filepath # Create list of sub-directories #os.chdir(filepath) # Change to given directory dirlist = [o for o in os.listdir(filepath)] # if os.path.isdir(o)] print dirlist # Initialise arrays for y-axes and one x-axis scale = np.array([]) activity = np.array([]) x = np.array([]) # For each sub-directory for dir in dirlist: # obtain activity.npz and scale.npz if present scalefile = "%s/%s/scale.npz" % (filepath, dir) activityfile = "%s/%s/activity.npz" % (filepath, dir) if not os.path.isfile(scalefile) or not os.path.isfile(activityfile): print "Missing scale.npz or activity.npz in %s" % dir # print error if not present, but continue else: # Load scale file print scalefile scaledata = np.load(scalefile) npscaledata = scaledata['y'] print "Scale size: %d" % np.size(npscaledata) # Check if this data is shorter than it should be # Can't check scale.shape[1] if scale is currently only 1-D if np.size(scale.shape) > 1 and np.size(npscaledata) < scale.shape[1]: sizediff = scale.shape[1] - np.size(npscaledata) # Pad the array to bring it up to correct size npscaledata = np.hstack((npscaledata, np.zeros(sizediff))) print "Padded by %d" % sizediff # Append y-axis data to 'scale' (or assign scale=y if y is empty) if np.size(scale) < 1: scale = npscaledata else: scale = np.vstack((scale, npscaledata)) # Grab this file's x-axis if it's bigger than the current one if np.size(x) < 1 or (np.size(x.shape) > 1 and x.shape[1] < np.size(scaledata['x'])): x = scaledata['x'] # Load activity file print activityfile activitydata = np.load(activityfile) npactivitydata = activitydata['y'] print "Activity size: %d" % np.size(npactivitydata) # Check if this data is shorter than it should be # Can't check activity.shape[1] if activity is currently only 1-D if np.size(activity.shape) > 1 and np.size(npactivitydata) < activity.shape[1]: sizediff = activity.shape[1] - np.size(npactivitydata) # Pad the array to bring it up to correct size npactivitydata = np.hstack((npactivitydata, np.zeros(sizediff))) print "Padded by %d" % sizediff # Append y-axis data to 'activity' (or assign activity=y if y is empty) if np.size(activity) < 1: activity = npactivitydata else: activity = np.vstack((activity, npactivitydata)) # Remove NaNs from data print "Removing NaNs" print "%d from scale" % np.sum(np.isnan(scale)) scale = np.nan_to_num(scale) print "%d from activity" % np.sum(np.isnan(activity)) activity = np.nan_to_num(activity) # Plot scale pyplot.errorbar(x, np.mean(scale,0), np.std(scale,0), ecolor='grey', linestyle='-', marker='.', markersize=1.0) # Draw labels pyplot.xlabel("Time (days)") pyplot.ylabel("Scale factor") # Save at 300 DPI as 'filepath/scale.pdf' pyplot.savefig("%s/scale.pdf" % filepath, dpi=300, format="pdf") np.savez("%s/scale" % filepath, x=x, y=np.mean(scale,0), err=np.std(scale,0)) pyplot.clf() # Clear plot # Plot activity pyplot.errorbar(x, np.mean(activity,0), np.std(activity,0), ecolor='grey', linestyle='-', marker='.', markersize=1.0) # Draw labels pyplot.xlabel("Time (days)") pyplot.ylabel("Activity (Hz)") # Save at 300 DPI as 'filepath/activity.pdf' pyplot.savefig("%s/activity.pdf" % filepath, dpi=300, format="pdf") np.savez("%s/activity" % filepath, x=x, y=np.mean(activity,0), err=np.std(activity,0))