import pandas as pd
tdf= pd.read_csv('City_of_Pittsburgh_Trees.csv', nrows = 10000)
len(tdf)
10000
tdf.head(5)
_id | id | address_number | street | common_name | scientific_name | height | width | growth_space_length | growth_space_width | ... | neighborhood | council_district | ward | tract | public_works_division | pli_division | police_zone | fire_zone | latitude | longitude | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 754166088 | 7428.0 | MONTICELLO ST | Stump | Stump | 0.0 | 0.0 | 10.0 | 2.0 | ... | Homewood North | 9.0 | 13.0 | 42003130200 | 2 | 13.0 | 5 | 3-17 | 40.458169 | -79.889724 |
1 | 2 | 1946899269 | 220.0 | BALVER AVE | Linden: Littleleaf | Tilia cordata | 0.0 | 0.0 | 99.0 | 99.0 | ... | Oakwood | 2.0 | 28.0 | 42003562800 | 5 | 28.0 | 6 | 1-19 | 40.429269 | -80.067868 |
2 | 3 | 1431517397 | 2822.0 | SIDNEY ST | Maple: Red | Acer rubrum | 22.0 | 6.0 | 6.0 | 3.0 | ... | South Side Flats | 3.0 | 16.0 | 42003160900 | 3 | 16.0 | 3 | 4-24 | 40.426797 | -79.965035 |
3 | 4 | 994063598 | 608.0 | SUISMON ST | Maple: Freeman | Acer x freemanii | 25.0 | 10.0 | 3.0 | 3.0 | ... | East Allegheny | 1.0 | 23.0 | 42003563200 | 1 | 23.0 | 1 | 1-6 | 40.455503 | -79.999276 |
4 | 5 | 1591838573 | 1135.0 | N NEGLEY AVE | Maple: Norway | Acer platanoides | 52.0 | 13.0 | 99.0 | 99.0 | ... | Highland Park | 7.0 | 11.0 | 42003110200 | 2 | 11.0 | 5 | 3-9 | 40.476667 | -79.924106 |
5 rows × 59 columns
# now do some statistical analysis
from collections import Counter
Counter(tdf.condition).most_common()
[('Good', 4025), ('Fair', 3643), ('Poor', 955), (nan, 891), ('Critical', 263), ('Dead', 146), ('Very Good', 75), ('Excellent', 2)]
#Counter(tdf.common_name).most_common()
Counter(tdf.land_use).most_common()
[('Residential', 6673), ('Commercial/Industrial', 1515), ('Vacant', 682), ('Multi-family Residential', 487), ('Institutional', 367), ('Park', 233), ('Transportation', 22), ('Cemetery', 8), ('Agriculture', 6), ('Golf Course', 5), ('Utility', 1), ('Other', 1)]
from shapely.geometry import Point
import geopandas as gpd
geometry = [Point(xy) for xy in zip(tdf['longitude'], tdf['latitude'])]
geometry[0]
gdf = gpd.GeoDataFrame(tdf, geometry = geometry)[['_id', 'id', 'address_number', 'condition', 'land_use', 'geometry']]
gdf.head()
_id | id | address_number | condition | land_use | geometry | |
---|---|---|---|---|---|---|
0 | 1 | 754166088 | 7428.0 | NaN | Vacant | POINT (-79.88972 40.45817) |
1 | 2 | 1946899269 | 220.0 | NaN | Residential | POINT (-80.06787 40.42927) |
2 | 3 | 1431517397 | 2822.0 | Fair | Commercial/Industrial | POINT (-79.96503 40.42680) |
3 | 4 | 994063598 | 608.0 | Fair | Residential | POINT (-79.99928 40.45550) |
4 | 5 | 1591838573 | 1135.0 | Good | Residential | POINT (-79.92411 40.47667) |
import matplotlib.pyplot as plt
f, ax = plt.subplots(1,1, figsize=(10,10))
gdf.plot(ax=ax, column = 'condition', legend = True, markersize = 5, alpha = 0.4)
<AxesSubplot: >
!pip install mapclassify
Collecting mapclassify Downloading mapclassify-2.8.1-py3-none-any.whl.metadata (2.8 kB) Collecting networkx>=2.7 (from mapclassify) Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB) Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.24.2) Requirement already satisfied: pandas!=1.5.0,>=1.4 in /usr/local/lib/python3.10/site-packages (from mapclassify) (2.1.4) Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.2.0) Requirement already satisfied: scipy>=1.8 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.10.0) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2022.7) Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2024.2) Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (1.2.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (3.1.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas!=1.5.0,>=1.4->mapclassify) (1.17.0) Downloading mapclassify-2.8.1-py3-none-any.whl (59 kB) Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 17.7 MB/s eta 0:00:00 Installing collected packages: networkx, mapclassify Successfully installed mapclassify-2.8.1 networkx-3.4.2
import matplotlib.pyplot as plt
import geopandas as gpd
# plotting figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))
# Finally, Plotting the GeoDataFrame with a darker color scheme
gdf.plot(
ax=ax,
column='condition',
cmap='viridis',
legend=True,
markersize=12,
alpha=0.8)
# my copyright
ax.text(
0.95, 0.1,
"© 2025 Pallabi Bhattacharya",
fontsize=12,
color='black',
transform=fig.transFigure,
ha='right',
alpha=0.9 )
# Adding title for clarity
ax.set_title("Tree Health Classification in Pittsburgh City", fontsize=14)
# Showing axis labels
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
plt.show()
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box
# Loading world basemap
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
pittsburgh_bounds = box(-80.1, 40.3, -79.8, 40.5)
# Filtering data using spatial intersection
pittsburgh_gdf = gdf[gdf.intersects(pittsburgh_bounds)]
# Creating figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))
# Ploting world basemap (light gray for better contrast)
world.plot(ax=ax, color='lightgray', edgecolor='black', alpha=0.5)
# Ploting Pittsburgh tree condition data
pittsburgh_gdf.plot(
ax=ax,
column='condition',
cmap='plasma',
markersize=12,
alpha=0.8
)
# Seting limits to focus on Pittsburgh
ax.set_xlim([-80.1, -79.8])
ax.set_ylim([40.3, 40.5])
# Adding title
ax.set_title("Tree Density by Condition in Pittsburgh City", fontsize=14)
# my copyright
ax.text(
0.95, 0.02,
"© 2025 Pallabi Bhattacharya",
fontsize=12,
color='black',
transform=fig.transFigure,
ha='right',
alpha=0.8
)
plt.show()
/var/folders/tn/qthxn6q95rd17mg5gsz5ctjh0000gq/T/ipykernel_27720/465681576.py:6: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/. world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))