Verified Commit f9faab7b authored by Clemens Langer's avatar Clemens Langer Committed by Martin Raifer
Browse files

Add Exploring Localness Blogpost

parent c63d5ba6
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
...@@ -13,6 +13,7 @@ folium==0.6.0 ...@@ -13,6 +13,7 @@ folium==0.6.0
html5lib==1.0.1 html5lib==1.0.1
idna==2.7 idna==2.7
ipykernel==4.8.2 ipykernel==4.8.2
ipyleaflet==0.12.4
ipython==6.5.0 ipython==6.5.0
ipython-genutils==0.2.0 ipython-genutils==0.2.0
ipywidgets==7.4.0 ipywidgets==7.4.0
...@@ -24,6 +25,7 @@ jupyter-client==5.2.3 ...@@ -24,6 +25,7 @@ jupyter-client==5.2.3
jupyter-console==5.2.0 jupyter-console==5.2.0
jupyter-core==4.4.0 jupyter-core==4.4.0
MarkupSafe==1.0 MarkupSafe==1.0
matplotlib==3.0.3
mistune==0.8.3 mistune==0.8.3
munch==2.3.2 munch==2.3.2
nbconvert==5.3.1 nbconvert==5.3.1
...@@ -38,6 +40,7 @@ plotly==3.1.1 ...@@ -38,6 +40,7 @@ plotly==3.1.1
prometheus-client==0.3.1 prometheus-client==0.3.1
prompt-toolkit==1.0.15 prompt-toolkit==1.0.15
Pygments==2.2.0 Pygments==2.2.0
pyproj==2.4.2.post1
python-dateutil==2.7.3 python-dateutil==2.7.3
pytz==2018.5 pytz==2018.5
pywinpty==0.5.4 pywinpty==0.5.4
......
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 10:28:46 2020
@author: Clemens Langer
"""
import requests
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sys
import warnings
warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)
def get_time_stamp_list(date1,date2):
'''returns a list dates off all monthly intervalls between to timestamps'''
start_year = [int(i) for i in date1.split("T")[0].split("-")]
end_year = [int(i) for i in date2.split("T")[0].split("-")]
if end_year[0:2:] == start_year[0:2:]:
return ["{}-{:02d}-01T00:00:00".format(end_year[0],end_year[1])]
timestamps = []
for i in range(start_year[0],end_year[0]+1):
if i == start_year[0]:
start_month = start_year[1]
else:
start_month = 1
if i != end_year[0]:
end_month = 12
else:
end_month = end_year[1]
for o in range(start_month,end_month+1):
timestamps.append("{}-{:02d}-01T00:00:00".format(i,o))
return timestamps
def plot_steps(ax,filter,location,BBOXes,c,len_filter,o,len_location,TIME, STEPS):
'''plots the steps for a certain filter / locations'''
URL = "https://api.ohsome.org/v1"
LINE_STYLE = ["-","--","-.",":"]
COLORS = ["C0","C1","C2","C3"]
time = TIME.replace("/P1M","").replace("/",",")
data = {"time":time, "filter":filter, "bboxes":BBOXes[location],"properties":"tags"}
with requests.post(URL+"/elementsFullHistory/centroid",data) as response:
body = response.json()
df = pd.DataFrame()
tags = {}
if "features" in body:
for i, feature in enumerate(body["features"]):
sys.stdout.write("\r finished {} out of {} filter for {} out of {} locations | {} / {} Objects".format(o, len_filter,c +1,len_location,i,len(body["features"])))
sys.stdout.flush()
try:
timestamps = get_time_stamp_list(feature["properties"]['@validFrom'],feature["properties"]["@validTo"])
for timestamp in timestamps:
if timestamp not in tags:
tags[timestamp] = [len(feature["properties"])-3]
else:
tags[timestamp].append(len(feature["properties"])-3)
except:
print(feature)
continue
else:
print(body)
sys.exit(1)
max_count = 0
for ts, values in tags.items():
max_count = max(max_count, len(values))
for s ,step in enumerate(STEPS):
series = pd.Series({timestamp:len([i for i in tag_lens if i >= step])/len(tag_lens) for timestamp, tag_lens in tags.items()})
df["{}_{}_{}".format(location,filter,step)]= series
df.set_index(pd.to_datetime(df.index),inplace=True)
df["{}_{}_{}".format(location,filter,step)].plot(linestyle=LINE_STYLE[s],c=COLORS[c],ax=ax,legend=True,title=filter)
def level3(BBOXes,time):
''' plots the facilities for locations '''
COLORS = ["C0","C1","C2","C3"]
URL = "https://api.ohsome.org/v1"
level_3_tags = ["amenity=water_point","amenity=drinking_water","amenity=place_of_whorship","amenity=clinic","amenity=doctors","amenity=dentist","healthcare=*","amenity=school","amenity=college","amenity=university","amenity=kindergarden","amenity=childcare","bridge=*","tunnel=*"]
filter = " or ".join(level_3_tags)
fig, axs = plt.subplots(figsize=(15,7))
for c,(location, bbox) in enumerate(BBOXes.items()):
data = {"time":time, "filter":filter, "bboxes":bbox}
response = requests.post(URL+"/elements/count",data)
df = pd.DataFrame()
series = pd.Series({i["timestamp"]:i["value"] for i in response.json()["result"]})
df[location]= series
df.set_index(pd.to_datetime(df.index),inplace=True)
df[location].plot(c=COLORS[c],legend=location,title="Level 3 Count",ax=axs)
def pointsOfIntrest(BBOXes,time):
''' plots the facilities for locations '''
COLORS = ["C0","C1","C2","C3"]
URL = "https://api.ohsome.org/v1"
filter = "name=* and amenity=*"
fig, axs = plt.subplots(figsize=(15,7))
for c,(location, bbox) in enumerate(BBOXes.items()):
data = {"time":time, "filter":filter, "bboxes":bbox}
response = requests.post(URL+"/elements/count",data)
df = pd.DataFrame()
series = pd.Series({i["timestamp"]:i["value"] for i in response.json()["result"]})
df[location]= series
df.set_index(pd.to_datetime(df.index),inplace=True)
df[location].plot(c=COLORS[c],legend=location,title="Points of Intrest Count",ax=axs)
def geometry(BBOXes,TIME):
''' plots the geometrical completeness of roadnetwork / building count for locations '''
COLORS = ["C0","C1","C2","C3"]
URL = "https://api.ohsome.org/v1"
filter = "highway=*"
fig, axs = plt.subplots(2,figsize=(15,10))
fig.suptitle("Geometrical development")
for c,(location, bbox) in enumerate(BBOXes.items()):
data = {"time":TIME, "filter":filter, "bboxes":bbox}
response = requests.post(URL+"/elements/length",data)
df = pd.DataFrame()
series = pd.Series({i["timestamp"]:i["value"] for i in response.json()["result"]})
df[location]= series
df.set_index(pd.to_datetime(df.index),inplace=True)
df[location].plot(c=COLORS[c],legend=location,title="Total lenght of Roadnetwork [km]",ax=axs[0])
filter = "building=*"
for c,(location, bbox) in enumerate(BBOXes.items()):
data = {"time":TIME, "filter":filter, "bboxes":bbox}
response = requests.post(URL+"/elements/count",data)
df = pd.DataFrame()
series = pd.Series({i["timestamp"]:i["value"] for i in response.json()["result"]})
df[location]= series
df.set_index(pd.to_datetime(df.index),inplace=True)
df[location].plot(c=COLORS[c],legend=location,title="Total count of buildings",ax=axs[1])
def plotTagcompletness(BBOXes,FILTER,TIME,STEPS=[0,3,5,10]):
import sys
COLORS = ["C0","C1","C2","C3"]
fig, axs = plt.subplots(len(FILTER),figsize=(len(FILTER)*5,15))
fig.suptitle("Portion of objects containing a minimum number of Tags")
for c,location in enumerate(BBOXes.keys()):
for o, filter in enumerate(FILTER):
sys.stdout.write("\r finished {} out of {} filter for {} out of {} locations | waiting for response".format(0,len(FILTER),c+1,len(BBOXes)))
sys.stdout.flush()
plot_steps(axs[o],filter,location,BBOXes,c,len(FILTER),o,len(BBOXes),TIME,STEPS)
"""
Created on Fri Nov 13 10:28:46 2020
@author: Clemens Langer
"""
import ipyleaflet
import ipywidgets as widgets
from branca.colormap import linear
import random
import requests
import numpy as np
import datetime
from math import sin, cos, sqrt, atan2, radians
from traitlets import Tuple
def get_distance(lon1, lat1, lon2, lat2):
'''returns the distance between 2 points in km'''
import pyproj
import math
from pyproj import Transformer
utm_band = str((math.floor((lon1 + 180) / 6 ) % 60) + 1)
if len(utm_band) == 1:
utm_band = '0'+utm_band
if lat1 >= 0:
epsg_code = '326' + utm_band
else:
epsg_code = '327' + utm_band
transformer = Transformer.from_crs("epsg:4326", 'epsg:{0}'.format(epsg_code))
x1, y1 =transformer.transform(lat1, lon1)
x2, y2 =transformer.transform(lat2, lon2)
return np.sqrt((x2-x1)**2 + (y2-y1)**2)/1000
def get_geojson_grid(BBOXes, n):
'''creates a grid with a given cell edge length in km containing one or multiple BBoxes'''
max_cells = 0
steps_dict = {}
for location, coord_string in BBOXes.items():
coord = [float(i) for i in coord_string.split(",")]
lower_left = coord[0:2]
upper_right = coord[2:4]
#calculate the minimum number of cells
dlat = get_distance(coord[0], coord[1], coord[0], coord[3])
dlon = get_distance(coord[0], coord[1], coord[2], coord[1])
flat = (n / dlat) * (int(dlat / n) + 1)
flon = (n / dlon) * (int(dlon / n) + 1)
lon_steps = np.linspace(
lower_left[0],
lower_left[0] + (upper_right[0] - lower_left[0]) * flon,
int(dlon / n) + 2,
)
lat_steps = np.linspace(
lower_left[1],
lower_left[1] + (upper_right[1] - lower_left[1]) * flat,
int(dlat / n) + 2,
)
steps_dict[location] = {"lat_steps": lat_steps, "lon_steps": lon_steps}
max_cells = max(len(lat_steps) * len(lon_steps), max_cells)
geo_json = {"type": "FeatureCollection", "features": []}
for base_id, (location, steps) in enumerate(steps_dict.items()):
lat_steps = steps["lat_steps"]
lon_steps = steps["lon_steps"]
lat_stride = lat_steps[1] - lat_steps[0]
lon_stride = lon_steps[1] - lon_steps[0]
for lat_id, lat in enumerate(lat_steps[:-1]):
for long_id, lon in enumerate(lon_steps[:-1]):
# Define dimensions of box in grid
upper_left = [lon, lat + lat_stride]
upper_right = [lon + lon_stride, lat + lat_stride]
lower_right = [lon + lon_stride, lat]
lower_left = [lon, lat]
if dlon > dlat:
id = (int(dlon / n) + 2) * long_id + lat_id
else:
id = (int(dlat / n) + 2) * lat_id + long_id
id = id + max_cells * base_id
# Define json coordinates for polygon
coordinates = [
upper_left[::],
upper_right[::],
lower_right[::],
lower_left[::],
upper_left[::],
]
grid_feature = {
"type": "Feature",
"id": "{}".format(id),
"properties": {
"location": location,
"lower_left": lower_left,
"upper_right": upper_right,
"bbox": "{},{},{},{}".format(
lower_left[0],
lower_left[1],
upper_right[0],
upper_right[1]),
},
"geometry": {
"type": "Polygon",
"coordinates": [coordinates],
},
}
geo_json["features"].append(grid_feature)
return geo_json
def get_time_stamp_list(date1, date2):
'''returns all timestamps between 2 dates'''
start_year = [int(i) for i in date1.split("T")[0].split("-")]
end_year = [int(i) for i in date2.split("T")[0].split("-")]
if end_year[0:2:] == start_year[0:2:]:
return ["{}-{:02d}-01T00:00:00".format(end_year[0], end_year[1])]
timestamps = []
for i in range(start_year[0], end_year[0] + 1):
if i == start_year[0]:
start_month = start_year[1]
else:
start_month = 1
if i != end_year[0]:
end_month = 12
else:
end_month = end_year[1]
for o in range(start_month, end_month + 1):
timestamps.append("{}-{:02d}-01T00:00:00".format(i, o))
return timestamps
def create_style_dict(response):
'''creates a style dict for choropleth map for a given ohsome response'''
style_dict = {}
body = response.json()
for result in body["groupByResult"]:
values = []
for i in result["result"]:
values.append(i["value"])
grid_id = result["groupByObject"]
for i in result["result"]:
timestamp = str(
datetime.datetime.strptime(
i["timestamp"].split("T")[0],
"%Y-%m-%d"))
if timestamp not in style_dict:
style_dict[timestamp] = {}
style_dict[timestamp][grid_id] = i["value"]
return style_dict
def calculate_tag(grid, TIME, FILTER, style_dict):
'''calculates the average number of tags for a given filters for given geojson'''
URL = "https://api.ohsome.org/v1"
endpoint = "/elementsFullHistory/centroid"
time = TIME.replace("/P1M", "").replace("/", ",")
for filter in FILTER:
style_dict["tags_{}".format(filter)] = {}
for cell in grid["features"]:
grid_id = cell["id"]
params = {
"bboxes": cell["properties"]["bbox"],
"filter": filter,
"time": time,
"properties": "tags",
}
with requests.post(URL + endpoint, data=params) as response:
body = response.json()
tags = {}
if "features" in body:
for feature in body["features"]:
timestamps = get_time_stamp_list(
feature["properties"]["@validFrom"],
feature["properties"]["@validTo"],
)
for timestamp in timestamps:
if timestamp not in tags:
tags[timestamp] = [len(feature["properties"]) - 3]
else:
tags[timestamp].append(
len(feature["properties"]) - 3)
for timestamp in tags.keys():
if timestamp not in style_dict["tags_{}".format(filter)]:
style_dict["tags_{}".format(filter)][timestamp] = {
grid_id: sum(tags[timestamp]) / len(tags[timestamp])
}
else:
style_dict["tags_{}".format(filter)][timestamp][grid_id] = sum(
tags[timestamp]) / len(tags[timestamp])
grid_ids = [cell["id"] for cell in grid["features"]]
for timestamp, values in style_dict["tags_{}".format(filter)].items():
for grid_id in grid_ids:
if grid_id not in values:
values[grid_id] = 0
return style_dict
def calculate_change(style_dict):
'''´calulates the diffrence between time intervals'''
temp_style_dict = {}
for filter in style_dict.keys():
dates = sorted([date for date in style_dict[filter].keys()])
temp_style_dict["delta" + filter] = {}
for i, timestamp in enumerate(dates):
temp_style_dict["delta" + filter][timestamp] = {}
for gridid in style_dict[filter][timestamp]:
if i == 0:
delta = 0
else:
delta = (
style_dict[filter][timestamp][gridid]
- style_dict[filter][dates[i - 1]][gridid]
)
temp_style_dict["delta" + filter][timestamp][gridid] = delta
for key, value in temp_style_dict.items():
style_dict[key] = value
return style_dict
class ChoroplethMap:
'''ChoroplethMap class, which handles the query and rendering for a given '''
def on_click(self, change):
'''handles changes for the layer selection'''
filter = change["new"]
self.layer.choro_data = self.all_style_dict[filter][str(
self.time_slider.value)]
self.color_range.max = max(
self.layer.choro_data.items(),
key=lambda k: k[1])[1]
def on_slide(self, change):
''' handles changes for the timeslider'''
self.layer.choro_data = self.all_style_dict[self.dropdown.value][change["new"]]
self.color_range.max = max(
self.layer.choro_data.items(),
key=lambda k: k[1])[1]
def on_selection(self, change):
'''handles changes for the color range selection'''
self.layer.value_min = change["new"][0]
self.layer.value_max = change["new"][1]
def zoom_out_to_target_bounds(self, change):
''' zooms automaticaly out to minimal zoomlevel containing certain target bounds'''
# the change owner is the widget triggering the handler, in this case a Map
# if we're not zoomed all the way out already, and we have a target...
if self.m.zoom > 1 and self.m.target_bounds:
b = self.m.target_bounds
n = change.new
if (
n[0][0] < b[0][0]
and n[0][1] < b[0][1]
and n[1][0] > b[1][0]
and n[1][1] > b[1][1]
):
# bounds are already large enough, so remove the target
self.m.target_bounds = None
else:
# zoom out
self.m.zoom = self.m.zoom - 1
def zoom_to_location(self, change):
'''handles changes in location selection'''
bbox = self.BBOXes[change["new"]]
coord = [float(i) for i in bbox.split(",")]
center = [sum(coord[1::2]) / 2, sum(coord[0::2]) / 2]
self.m.center = center
self.m.target_bounds = ((coord[1], coord[0]), (coord[3], coord[2]))
def getTimeChoroplethMap(
self,
BBOXes,
FILTER,
TIME,
delta=True,
tags=True,
size=1):
'''creates and querys all needed data for the choropleth map'''
URL = "https://api.ohsome.org/v1"
self.BBOXes = BBOXes
FILTER = FILTER
TIME = TIME
self.all_style_dict = {}
self.grid = get_geojson_grid(BBOXes, size)
endpoint = "/elements/count/groupBy/boundary"
for filter in FILTER:
params = {
"bboxes": "|".join(
[
"{}:{}".format(i["id"], i["properties"]["bbox"])
for i in self.grid["features"]
]
),
"filter": filter,
"time": TIME,
}
response = requests.post(URL + endpoint, data=params)
self.all_style_dict[filter] = create_style_dict(response)
if delta:
self.all_style_dict = calculate_change(self.all_style_dict)
if tags:
self.all_style_dict = calculate_tag(
self.grid, TIME, FILTER, self.all_style_dict
)
def loadJSON(self, path_style, path_grid, BBOXes):
import json
self.BBOXes = BBOXes
self.all_style_dict = json.load(open(path_style))
self.grid = json.load(open(path_grid))
def saveJSON(self, path_style, path_grid):
import json
with open(path_style,'w+') as out_style:
json.dump(self.all_style_dict, out_style)
with open(path_grid,'w+') as out_grid:
json.dump(self.grid, out_grid)
def renderMap(self):
'''creates ipyleaflet map'''
self.m = ipyleaflet.Map(
center=(
33.66832279243364,
135.8861750364304),
zoom=10)
# self.grid
options = []
options.extend(list(self.all_style_dict.keys()))
option = options[0]
dates = sorted([date for date in self.all_style_dict[option].keys()])
self.layer = ipyleaflet.Choropleth(
geo_data=self.grid,
choro_data=self.all_style_dict[option][str(dates[0])],
colormap=linear.YlOrRd_04,
style={"fillOpacity": 0.8, "dashArray": "5, 5"},
)
self.m.add_layer(self.layer)
self.time_slider = widgets.SelectionSlider(
options=dates,
value=dates[0],
description="TimeStamp",
disabled=False,
continuous_update=False,
orientation="horizontal",
readout=True,
)
self.time_slider.observe(self.on_slide, "value")
widget_control_slider = ipyleaflet.WidgetControl(
widget=self.time_slider, position="bottomright"
)
self.m.add_control(widget_control_slider)
# widgets.interact(update_map_time, timeStamp = self.time_slider)
self.dropdown = widgets.Dropdown(
options=options, value=option, description="Select layer"
)
self.dropdown.observe(self.on_click, "value")
widget_control = ipyleaflet.WidgetControl(
widget=self.dropdown, position="topright"
)
self.m.add_control(widget_control)
self.color_range = widgets.IntRangeSlider(
value=[self.layer.value_min, self.layer.value_max],
min=self.layer.value_min,
max=self.layer.value_max,
step=1,
description="ColorBar:",
disabled=False,
continuous_update=False,
orientation="horizontal",