#!/usr/bin/python3 import csv import io import json import zipfile import urllib.request shapes = {} # https://stackoverflow.com/a/5711095 resp = urllib.request.urlopen("https://www.viarail.ca/sites/all/files/gtfs/viarail.zip") with zipfile.ZipFile(io.BytesIO(resp.read())).open('shapes.txt') as f: reader = csv.DictReader(io.TextIOWrapper(f)) for row in reader: # they look like they're probably all in order, but the spec doesn't # actually say that they have to be, so we're going to bucket them by # route and then sort each bucket's contents to be on the safe side. # It's not that much data, and I don't run this download frequently, so # the extra CPU cost shouldn't be too outrageous :) if row['shape_id'] not in shapes: shapes[row['shape_id']] = [] shapes[row['shape_id']].append(row) geojson = { "type": "FeatureCollection", "features": [], } for _, shape in shapes.items(): shape.sort(key=lambda c: int(c['shape_pt_sequence'])) geojson['features'].append({ "type": "Feature", "geometry": { "type": "LineString", "coordinates": [ [float(l['shape_pt_lon']), float(l['shape_pt_lat'])] for l in shape ] }, }) with open('data.geojson', 'w') as f: f.write(json.dumps(geojson))