import pandas as pd


tdf= pd.read_csv('City_of_Pittsburgh_Trees.csv', nrows = 10000)


len(tdf)

10000


tdf.head(5)


# now do some statistical analysis

from collections import Counter


Counter(tdf.condition).most_common()

[('Good', 4025),
 ('Fair', 3643),
 ('Poor', 955),
 (nan, 891),
 ('Critical', 263),
 ('Dead', 146),
 ('Very Good', 75),
 ('Excellent', 2)]


#Counter(tdf.common_name).most_common()


Counter(tdf.land_use).most_common()

[('Residential', 6673),
 ('Commercial/Industrial', 1515),
 ('Vacant', 682),
 ('Multi-family Residential', 487),
 ('Institutional', 367),
 ('Park', 233),
 ('Transportation', 22),
 ('Cemetery', 8),
 ('Agriculture', 6),
 ('Golf Course', 5),
 ('Utility', 1),
 ('Other', 1)]


from shapely.geometry import Point


import geopandas as gpd


geometry = [Point(xy) for xy in zip(tdf['longitude'], tdf['latitude'])]
geometry[0]


gdf = gpd.GeoDataFrame(tdf, geometry = geometry)[['_id', 'id', 'address_number', 'condition', 'land_use', 'geometry']]


gdf.head()


import matplotlib.pyplot as plt
f, ax = plt.subplots(1,1, figsize=(10,10))
gdf.plot(ax=ax, column = 'condition', legend = True, markersize = 5, alpha = 0.4)

<AxesSubplot: >


!pip install mapclassify

Collecting mapclassify
  Downloading mapclassify-2.8.1-py3-none-any.whl.metadata (2.8 kB)
Collecting networkx>=2.7 (from mapclassify)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.24.2)
Requirement already satisfied: pandas!=1.5.0,>=1.4 in /usr/local/lib/python3.10/site-packages (from mapclassify) (2.1.4)
Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.2.0)
Requirement already satisfied: scipy>=1.8 in /usr/local/lib/python3.10/site-packages (from mapclassify) (1.10.0)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2022.7)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2024.2)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/site-packages (from scikit-learn>=1.0->mapclassify) (3.1.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas!=1.5.0,>=1.4->mapclassify) (1.17.0)
Downloading mapclassify-2.8.1-py3-none-any.whl (59 kB)
Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 17.7 MB/s eta 0:00:00
Installing collected packages: networkx, mapclassify
Successfully installed mapclassify-2.8.1 networkx-3.4.2


import matplotlib.pyplot as plt
import geopandas as gpd

# plotting figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Finally, Plotting the GeoDataFrame with a darker color scheme
gdf.plot(
    ax=ax, 
    column='condition', 
    cmap='viridis',  
    legend=True, 
    markersize=12, 
    alpha=0.8)

 # my copyright 
ax.text(
    0.95, 0.1,  
    "© 2025 Pallabi Bhattacharya",  
    fontsize=12, 
    color='black', 
    transform=fig.transFigure,  
    ha='right',  
    alpha=0.9  )

# Adding title for clarity
ax.set_title("Tree Health Classification in Pittsburgh City", fontsize=14)

# Showing axis labels
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.show()


import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box

# Loading world basemap
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


pittsburgh_bounds = box(-80.1, 40.3, -79.8, 40.5)

# Filtering data using spatial intersection
pittsburgh_gdf = gdf[gdf.intersects(pittsburgh_bounds)]

# Creating figure and axis
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Ploting world basemap (light gray for better contrast)
world.plot(ax=ax, color='lightgray', edgecolor='black', alpha=0.5)

# Ploting Pittsburgh tree condition data
pittsburgh_gdf.plot(
    ax=ax, 
    column='condition', 
    cmap='plasma',  
    markersize=12,  
    alpha=0.8  
)

# Seting limits to focus on Pittsburgh
ax.set_xlim([-80.1, -79.8])
ax.set_ylim([40.3, 40.5])

# Adding title
ax.set_title("Tree Density by Condition in Pittsburgh City", fontsize=14)

# my copyright 
ax.text(
    0.95, 0.02,  
    "© 2025 Pallabi Bhattacharya",  
    fontsize=12, 
    color='black', 
    transform=fig.transFigure,  
    ha='right',  
    alpha=0.8  
)

plt.show()

/var/folders/tn/qthxn6q95rd17mg5gsz5ctjh0000gq/T/ipykernel_27720/465681576.py:6: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

	_id	id	address_number	street	common_name	scientific_name	height	width	growth_space_length	growth_space_width	...	neighborhood	council_district	ward	tract	public_works_division	pli_division	police_zone	fire_zone	latitude	longitude
0	1	754166088	7428.0	MONTICELLO ST	Stump	Stump	0.0	0.0	10.0	2.0	...	Homewood North	9.0	13.0	42003130200	2	13.0	5	3-17	40.458169	-79.889724
1	2	1946899269	220.0	BALVER AVE	Linden: Littleleaf	Tilia cordata	0.0	0.0	99.0	99.0	...	Oakwood	2.0	28.0	42003562800	5	28.0	6	1-19	40.429269	-80.067868
2	3	1431517397	2822.0	SIDNEY ST	Maple: Red	Acer rubrum	22.0	6.0	6.0	3.0	...	South Side Flats	3.0	16.0	42003160900	3	16.0	3	4-24	40.426797	-79.965035
3	4	994063598	608.0	SUISMON ST	Maple: Freeman	Acer x freemanii	25.0	10.0	3.0	3.0	...	East Allegheny	1.0	23.0	42003563200	1	23.0	1	1-6	40.455503	-79.999276
4	5	1591838573	1135.0	N NEGLEY AVE	Maple: Norway	Acer platanoides	52.0	13.0	99.0	99.0	...	Highland Park	7.0	11.0	42003110200	2	11.0	5	3-9	40.476667	-79.924106

	_id	id	address_number	condition	land_use	geometry
0	1	754166088	7428.0	NaN	Vacant	POINT (-79.88972 40.45817)
1	2	1946899269	220.0	NaN	Residential	POINT (-80.06787 40.42927)
2	3	1431517397	2822.0	Fair	Commercial/Industrial	POINT (-79.96503 40.42680)
3	4	994063598	608.0	Fair	Residential	POINT (-79.99928 40.45550)
4	5	1591838573	1135.0	Good	Residential	POINT (-79.92411 40.47667)

Turning Tabular data into geospatial _ City of Pittsburgh Trees¶

Lets create something cool¶