Turning Tabular data into geospatial _ City of Pittsburgh Trees¶

In [2]:
import pandas as pd
In [3]:
tdf= pd.read_csv('City_of_Pittsburgh_Trees.csv', nrows = 10000)
In [4]:
len(tdf)
Out[4]:
10000
In [6]:
tdf.head(5)
Out[6]:
_id id address_number street common_name scientific_name height width growth_space_length growth_space_width ... neighborhood council_district ward tract public_works_division pli_division police_zone fire_zone latitude longitude
0 1 754166088 7428.0 MONTICELLO ST Stump Stump 0.0 0.0 10.0 2.0 ... Homewood North 9.0 13.0 42003130200 2 13.0 5 3-17 40.458169 -79.889724
1 2 1946899269 220.0 BALVER AVE Linden: Littleleaf Tilia cordata 0.0 0.0 99.0 99.0 ... Oakwood 2.0 28.0 42003562800 5 28.0 6 1-19 40.429269 -80.067868
2 3 1431517397 2822.0 SIDNEY ST Maple: Red Acer rubrum 22.0 6.0 6.0 3.0 ... South Side Flats 3.0 16.0 42003160900 3 16.0 3 4-24 40.426797 -79.965035
3 4 994063598 608.0 SUISMON ST Maple: Freeman Acer x freemanii 25.0 10.0 3.0 3.0 ... East Allegheny 1.0 23.0 42003563200 1 23.0 1 1-6 40.455503 -79.999276
4 5 1591838573 1135.0 N NEGLEY AVE Maple: Norway Acer platanoides 52.0 13.0 99.0 99.0 ... Highland Park 7.0 11.0 42003110200 2 11.0 5 3-9 40.476667 -79.924106

5 rows × 59 columns

In [7]:
# now do some statistical analysis

from collections import Counter
In [13]:
Counter(tdf.condition).most_common()
Out[13]:
[('Good', 4025),
 ('Fair', 3643),
 ('Poor', 955),
 (nan, 891),
 ('Critical', 263),
 ('Dead', 146),
 ('Very Good', 75),
 ('Excellent', 2)]
In [15]:
#Counter(tdf.common_name).most_common()
In [21]:
Counter(tdf.land_use).most_common()
Out[21]:
[('Residential', 6673),
 ('Commercial/Industrial', 1515),
 ('Vacant', 682),
 ('Multi-family Residential', 487),
 ('Institutional', 367),
 ('Park', 233),
 ('Transportation', 22),
 ('Cemetery', 8),
 ('Agriculture', 6),
 ('Golf Course', 5),
 ('Utility', 1),
 ('Other', 1)]
In [18]:
from shapely.geometry import Point
In [19]:
import geopandas as gpd
In [20]:
geometry = [Point(xy) for xy in zip(tdf['longitude'], tdf['latitude'])]
geometry[0]                                  
                                  
Out[20]:
In [22]:
gdf = gpd.GeoDataFrame(tdf, geometry = geometry)[['_id', 'id', 'address_number', 'condition', 'land_use', 'geometry']]
In [23]:
gdf.head()
Out[23]:
_id id address_number condition land_use geometry
0 1 754166088 7428.0 NaN Vacant POINT (-79.88972 40.45817)
1 2 1946899269 220.0 NaN Residential POINT (-80.06787 40.42927)
2 3 1431517397 2822.0 Fair Commercial/Industrial POINT (-79.96503 40.42680)
3 4 994063598 608.0 Fair Residential POINT (-79.99928 40.45550)
4 5 1591838573 1135.0 Good Residential POINT (-79.92411 40.47667)
In [27]:
import matplotlib.pyplot as plt
f, ax = plt.subplots(1,1, figsize=(10,10))
gdf.plot(ax=ax, column = 'condition', legend = True, markersize = 5, alpha = 0.4)
Out[27]:
<AxesSubplot: >
In [30]:
!pip install mapclassify
Collecting mapclassify
  Downloading mapclassify-2.8.1-py3-none-any.whl.metadata (2.8 kB)
Collecting networkx>=2.7 (from mapclassify)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.24.2)
Requirement already satisfied: pandas!=1.5.0,>=1.4 in /usr/local/lib/python3.10/site-packages (from mapclassify) (2.1.4)
Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.2.0)
Requirement already satisfied: scipy>=1.8 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.10.0)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2022.7)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2024.2)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (3.1.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas!=1.5.0,>=1.4->mapclassify) (1.17.0)
Downloading mapclassify-2.8.1-py3-none-any.whl (59 kB)
Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 17.7 MB/s eta 0:00:00
Installing collected packages: networkx, mapclassify
Successfully installed mapclassify-2.8.1 networkx-3.4.2

Lets create something cool¶

In [69]:
import matplotlib.pyplot as plt
import geopandas as gpd

# plotting figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Finally, Plotting the GeoDataFrame with a darker color scheme
gdf.plot(
    ax=ax, 
    column='condition', 
    cmap='viridis',  
    legend=True, 
    markersize=12, 
    alpha=0.8)

 # my copyright 
ax.text(
    0.95, 0.1,  
    "© 2025 Pallabi Bhattacharya",  
    fontsize=12, 
    color='black', 
    transform=fig.transFigure,  
    ha='right',  
    alpha=0.9  )

# Adding title for clarity
ax.set_title("Tree Health Classification in Pittsburgh City", fontsize=14)

# Showing axis labels
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.show()
In [65]:
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box

# Loading world basemap
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


pittsburgh_bounds = box(-80.1, 40.3, -79.8, 40.5)

# Filtering data using spatial intersection
pittsburgh_gdf = gdf[gdf.intersects(pittsburgh_bounds)]

# Creating figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Ploting world basemap (light gray for better contrast)
world.plot(ax=ax, color='lightgray', edgecolor='black', alpha=0.5)

# Ploting Pittsburgh tree condition data
pittsburgh_gdf.plot(
    ax=ax, 
    column='condition', 
    cmap='plasma',  
    markersize=12,  
    alpha=0.8  
)

# Seting limits to focus on Pittsburgh
ax.set_xlim([-80.1, -79.8])
ax.set_ylim([40.3, 40.5])

# Adding title
ax.set_title("Tree Density by Condition in Pittsburgh City", fontsize=14)

# my copyright 
ax.text(
    0.95, 0.02,  
    "© 2025 Pallabi Bhattacharya",  
    fontsize=12, 
    color='black', 
    transform=fig.transFigure,  
    ha='right',  
    alpha=0.8  
)

plt.show()
/var/folders/tn/qthxn6q95rd17mg5gsz5ctjh0000gq/T/ipykernel_27720/465681576.py:6: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
In [ ]: