I wanted to share a cool code mash up I did to show some stats about Err usage in the world.
The result is here, in blue roughly after a non reliable homebrew datamining from the apache logs:
I applied this tip
As dependencies you gonna need :
Here is the full snippet :
#! /usr/bin/python import matplotlib matplotlib.use('Qt4Agg') from datetime import datetime import os DIR = 'data/' dirList=os.listdir(DIR) unzip = lambda l:tuple(zip(*l)) all_dates = {} for fname in dirList: if fname.startswith('gootz-access'): counts = {} d = datetime.strptime(fname.split('.')[1], '%Y-%m-%d') with open(DIR+fname) as f: data = f.readlines() for line in data: if line.find('err/version') != -1: addr = line.split('-')[0].strip() if counts.has_key(addr): counts[addr] += 1 else: counts[addr] = 1 all_dates[d] = counts sorted_dates = sorted(all_dates.keys()) import GeoIP gi = GeoIP.open('GeoLiteCity.dat',GeoIP.GEOIP_STANDARD) insts_pos = {} devs_pos = {} for date in sorted_dates: for ip, count in all_dates[date].iteritems(): record = gi.record_by_name(ip) if record and record['latitude']: lon = record['longitude'] lat = record['latitude'] toinc = devs_pos if count > 5 else insts_pos if toinc.has_key((lon,lat)): toinc[(lon,lat)] += 1 else: toinc[(lon,lat)] = 1 from mpl_toolkits.basemap import Basemap import matplotlib.pyplot as plt import numpy as np # lon_0 is central longitude of robinson projection. # resolution = 'c' means use crude resolution coastlines. m = Basemap(projection='robin',lon_0=0,resolution='c') #set a background colour m.drawmapboundary(fill_color='#85A6D9') # draw coastlines, country boundaries, fill continents. m.fillcontinents(color='white',lake_color='#85A6D9') m.drawcoastlines(color='#6D5F47', linewidth=.4) m.drawcountries(color='#6D5F47', linewidth=.4) # draw lat/lon grid lines every 30 degrees. m.drawmeridians(np.arange(-180, 180, 30), color='#bbbbbb') m.drawparallels(np.arange(-90, 90, 30), color='#bbbbbb') inst_lngs = [entry[0][0] for entry in insts_pos.iteritems()] inst_lats = [entry[0][1] for entry in insts_pos.iteritems()] inst_count = [entry[1] for entry in insts_pos.iteritems()] inst_x,inst_y = m(inst_lngs,inst_lats) s_inst_count = [p * p for p in inst_count] m.scatter( inst_x, inst_y, s=s_inst_count, #size c='blue', #color marker='o', #symbol alpha=0.25, #transparency zorder = 2, #plotting order ) for population, xpt, ypt in zip(inst_count, inst_x, inst_y): label_txt = int(round(population, 0)) #round to 0 dp and display as integer plt.text( xpt, ypt, label_txt, color = 'blue', size='small', horizontalalignment='center', verticalalignment='center', zorder = 3, ) devs_lngs = [entry[0][0] for entry in devs_pos.iteritems()] devs_lats = [entry[0][1] for entry in devs_pos.iteritems()] devs_count = [entry[1] for entry in devs_pos.iteritems()] devs_x,devs_y = m(devs_lngs,devs_lats) s_devs_count = [p * p for p in devs_count] m.scatter( devs_x, devs_y, s=s_devs_count, #size c='red', #color marker='o', #symbol alpha=0.25, #transparency zorder = 4, #plotting order ) for population, xpt, ypt in zip(devs_count, devs_x, devs_y): label_txt = int(round(population, 0)) #round to 0 dp and display as integer plt.text( xpt, ypt, label_txt, color = 'red', size='small', horizontalalignment='center', verticalalignment='center', zorder = 5, ) #add a title and display the map on screen plt.title('From where Err is used.') plt.show()
No comments:
Post a Comment