app completed

shivkapadia · Dec 14, 2022 · cd0440c · cd0440c
commit cd0440c
Show file tree

Hide file tree

Showing 11 changed files with 29,853 additions and 0 deletions.
diff --git a/Home.py b/Home.py
@@ -0,0 +1,16 @@
+import streamlit as st
+import pandas as pd
+import numpy as np
+
+
+st.set_page_config(
+    page_title="NYC Parking Ticket Data Visualizer",
+    page_icon="🚗",
+)
+
+st.header("🚗 NYC Parking Ticket Data Visualizer! 🚗")
+
+st.image('images/nyc_map_image.jpg')
+
+
+st.write('The motivation behind this project stems from us challenging ourselves to help NYC residents understand the frequency of violations in the most optimal way possible. Rather than reading a blog with paragraphs, why not allow users to understand the same amount of information by viewing a few graphs. We are creating data visualizations of NYC Parking Tickets data to analyze and draw conclusions. We want to see if there are any patterns we can identify in this data. The data we are using is around 30,000 rows of tickets that includes information such as Registration State, Violation Code, Violation Time, etc. Navigate to the Visualizations page to interact with the different models we made. Navigate tot he License page to interact with our dataset.')
diff --git a/README.md b/README.md
@@ -0,0 +1,2 @@
+# NYC-Parking-Ticket-Data-Visualizer
+A Data visualizer that displays the following: Where are the most tickets issues, most common kinds of tickets, Who write the ticket, proportion of out of state to in state and  type of car that gets ticketed.
diff --git a/data/NYC_Parking_Data.csv b/data/NYC_Parking_Data.csv
diff --git a/images/ange_circle.JPG b/images/ange_circle.JPG
diff --git a/images/conor_circle.JPG b/images/conor_circle.JPG
diff --git a/images/fahad_circle.JPG b/images/fahad_circle.JPG
diff --git a/images/nyc_map_image.jpg b/images/nyc_map_image.jpg
diff --git a/pages/About.py b/pages/About.py
@@ -0,0 +1,46 @@
+import pandas as pd 
+import numpy as np
+from matplotlib import pyplot as plt 
+import seaborn as sns
+import streamlit as st
+
+st.header("🚗 Meet Team Park It Like It's Hot! 🚗")
+
+
+col1, mid, col2 = st.columns([1,10,20])
+with col1:
+    st.image('images/ange_circle.jpg', width=250)
+with col2:
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write('Ange Louis')
+    st.write('Senior at The City College of New York')
+    st.write('[LinkedIn](https://www.linkedin.com/in/ange-louis/) | [Github](https://github.com/angelouis)')
+
+
+col1, mid, col2 = st.columns([1,10,20])
+with col1:
+    st.image('images/conor_circle.jpg', width=250)
+with col2:
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write('Conor Farrell')
+    st.write('Junior at College of Staten Island')
+    st.write('[LinkedIn](https://www.linkedin.com/in/conor7276/) | [Github](https://github.com/conor7276)')
+
+col1, mid, col2 = st.columns([1,10,20])
+with col1:
+    st.image('images/fahad_circle.jpg', width=250)
+with col2:
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write("\n")
+    st.write('Fahad Tahir')
+    st.write('Senior at Hunter College')
+    st.write('[LinkedIn](https://www.linkedin.com/in/fahad-tahir-58612314a/) | [Github](https://github.com/fahadtahir02)')
+
diff --git a/pages/License.py b/pages/License.py
@@ -0,0 +1,51 @@
+import pandas as pd 
+import numpy as np
+from matplotlib import pyplot as plt 
+import seaborn as sns
+import streamlit as st
+
+df = pd.read_csv("https://raw.githubusercontent.com/fahadtahir02/NYC-Parking-Ticket-Data-Visualizer/main/data/NYC_Parking_Data.csv")
+
+st.header("🔍 Find your license plate! 🔍")
+
+license_values = np.insert(df['Plate ID'].unique(),0,values="All")
+
+input = st.text_input('Put in your license plate, try some examples such as (41768JM , GUD1201)')
+
+
+
+if input in license_values:
+    st.write("We have your parking ticket.\nGathering data on it:")
+
+    #drop unused columns
+    df = df.drop('Unnamed: 0.3', axis = 1)
+    df = df.drop('Unnamed: 0.2', axis = 1)
+    df = df.drop('Unnamed: 0.1', axis = 1)
+    df = df.drop('Unnamed: 0', axis = 1)
+    df = df.drop('level_0', axis = 1)
+    df = df.drop('index', axis = 1)
+    df = df.drop('In NYC', axis = 1)
+    df = df.drop('Feet From Curb',axis = 1)
+    df = df.drop('Violation Post Code',axis = 1)
+    df = df.drop('Violation Description',axis = 1)
+    df = df.drop('No Standing or Stopping Violation',axis = 1)
+    df = df.drop('Hydrant Violation',axis = 1)
+    df = df.drop('Double Parking Violation', axis = 1)
+    df = df.drop('IntTime', axis = 1)
+    df = df.drop('below 96th', axis = 1)
+    df = df.drop('Meter Number',axis = 1)
+
+    cond = (df['Plate ID'] == input)
+    plate_info = df[cond]
+
+    total = plate_info['Price of Ticket'].sum()
+    low = plate_info['Hour'].min()
+    high = plate_info['Hour'].max()
+
+    st.write("The total amount this plate owes is $", total, "\nTaking place from hours ", low, " to ", high)
+    pd.set_option('display.max_columns',99)
+    st.dataframe(plate_info)
+
+elif input not in license_values:
+    st.write("There is no record of a parkting ticket for that plate")
+
diff --git a/pages/Visualizations.py b/pages/Visualizations.py
@@ -0,0 +1,218 @@
+import pandas as pd 
+import numpy as np
+from matplotlib import pyplot as plt 
+import seaborn as sns
+import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
+
+df = pd.read_csv("data/NYC_Parking_Data.csv")
+
+st.header("📈 Choose a visualization to view 📈")
+
+visuals = ["Hotspots", "Common Kinds of Tickets", "Time of Day", "Most Spotted Color", "Fines Over the Years"]
+
+option = st.selectbox(
+     'Choose a visualization below',
+    visuals)
+
+st.write('You selected:', option)
+
+if(option == "Time of Day"):
+    def load_data():
+        try:
+            df = pd.read_csv('https://raw.githubusercontent.com/fahadtahir02/NYC-Parking-Ticket-Data-Visualizer/main/data/NYC_Parking_Data.csv')
+            print("Loading github data")
+        except Exception as e:
+            df = pd.read_csv('data/NYC_Parking_Data_Updated(1).csv')
+            print('Loading local data')
+        return df
+
+    df = load_data()
+    @st.cache
+    def display_map(df, hour,year): #suppress_st_warning=True
+
+
+        condition = ((df['Hour'] == hour) & (df['Year'] == year))
+        new_df = df[condition]
+        #df = df[df['IntTime'] > 1200] testing with only showing specific times
+        fig = px.scatter_mapbox(new_df,
+                                lon = new_df['Longitude'],
+                                lat = new_df['Latitude'],                        
+                                zoom = 9,
+                                color = new_df['Price of Ticket'], # this option can change as long as it is of type int
+                                #color_continuous_scale = "bluyl", #bluyl
+                                center = {'lat' : 40.7,'lon' : -74},
+                                title = "Time of Day",
+                                hover_name = new_df['Full Address'],
+                                hover_data = [new_df['Time'],new_df['Issue Date'],new_df['Violation'],new_df['Price of Ticket']],
+                                height = 700,
+                                width = 700)
+        fig.update_layout(mapbox_style = "carto-positron") #other mapbox styles are available such as: 'open-street-map', 'white-bg',
+        # 'carto-positron', 'carto-darkmatter', 'stamen- terrain', 'stamen-toner', 'stamen-watercolor'
+        # Other mapbox styles are available but require an api key.
+        #fig.show()
+        #fig.update_traces(marker_line_width = 0)
+        return fig
+
+
+    #year = st.slider(label="Year",min_value=2013,max_value=2017,value=2015,step=1, help = "Select the year of the day to be displayed:"''',on_change = display_map(month)''')
+    #hour = st.slider(label= "Hour", min_value=0,max_value= 23, value = 12, step = 1, help = "Select the hour to be displayed:"''', on_change = display_map(year)''') 
+    year_options = [2013,2014,2015,2016,2017]
+    hour_options = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+    year = st.select_slider(label="Year",options = year_options,value=2015,help = "Select the year of the day to be displayed:")
+    hour = st.select_slider(label= "Hour", options = hour_options, value = 12, help = "Select the hour to be displayed:") 
+    print("Creating map")
+
+    fig = display_map(df,hour,year)
+
+    print(type(fig))
+    st.plotly_chart(fig, use_container_width = True)
+
+    del fig
+
+elif(option == "Fines Over the Years"):
+    d = {'Years' : [2013,2014,2015,2016,2017], 'Total Earned' : [575170.0,291945.0,895710,243395.0,271130.0]}
+    df = pd.DataFrame(data = d)
+
+    fig = plt.figure(figsize=(10,4))
+    sns.barplot(data = df, x = 'Years', y = 'Total Earned')
+
+    print("Creating bar chart")
+
+    st.write("Total amount made per year off of tickets.")
+    st.pyplot(fig) 
+    for year in range(len(df['Years'])):
+        st.write(df['Years'].iloc[year], ": $", df['Total Earned'].iloc[year])
+    # can use a switch structure to choose what graph to display based on option
+elif(option == "Hotspots"):
+    def load_data():
+        try:
+            df = pd.read_csv('https://raw.githubusercontent.com/fahadtahir02/NYC-Parking-Ticket-Data-Visualizer/main/data/NYC_Parking_Data.csv')
+            print("Loading github data")
+        except Exception as e:
+            df = pd.read_csv('data/NYC_Parking_Data_Updated(1).csv')
+            print('Loading local data')
+        return df
+
+    df = load_data()
+    @st.cache
+    def display_map(df, hour,year): #suppress_st_warning=True
+
+
+        condition = ((df['Hour'] == hour) & (df['Year'] == year))
+        new_df = df[condition]
+        #df = df[df['IntTime'] > 1200] testing with only showing specific times
+        fig = px.density_mapbox(new_df,
+                                lon = new_df['Longitude'],
+                                lat = new_df['Latitude'],                        
+                                zoom = 9,
+                                #color = new_df['Price of Ticket'], # this option can change as long as it is of type int
+                                #color_continuous_scale = "bluyl", #bluyl
+                                center = {'lat' : 40.7,'lon' : -74},
+                                title = "Hotspots",
+                                hover_name = new_df['Full Address'],
+                                hover_data = [new_df['Time'],new_df['Issue Date'],new_df['Violation'],new_df['Price of Ticket']],
+                                opacity = 0.75,
+                                height = 700,
+                                width = 700)
+        fig.update_layout(mapbox_style = "carto-positron") #other mapbox styles are available such as: 'open-street-map', 'white-bg',
+        # 'carto-positron', 'carto-darkmatter', 'stamen- terrain', 'stamen-toner', 'stamen-watercolor'
+        # Other mapbox styles are available but require an api key.
+        #fig.show()
+        #fig.update_traces(marker_line_width = 0)
+        return fig
+
+
+    #year = st.slider(label="Year",min_value=2013,max_value=2017,value=2015,step=1, help = "Select the year of the day to be displayed:"''',on_change = display_map(month)''')
+    #hour = st.slider(label= "Hour", min_value=0,max_value= 23, value = 12, step = 1, help = "Select the hour to be displayed:"''', on_change = display_map(year)''') 
+    year_options = [2013,2014,2015,2016,2017]
+    hour_options = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+    year = st.select_slider(label="Year",options = year_options,value=2015,help = "Select the year of the day to be displayed:")
+    hour = st.select_slider(label= "Hour", options = hour_options, value = 12, help = "Select the hour to be displayed:") 
+    print("Creating map")
+
+    fig = display_map(df,hour,year)
+
+    print(type(fig))
+    st.plotly_chart(fig, use_container_width = True)
+
+    del fig
+elif(option == "Fines Over the Years"):
+    d = {'Years' : [2013,2014,2015,2016,2017], 'Total Earned' : [575170.0,291945.0,895710,243395.0,271130.0]}
+    df = pd.DataFrame(data = d)
+
+    fig = plt.figure(figsize=(10,4))
+    sns.barplot(data = df, x = 'Years', y = 'Total Earned')
+
+    print("Creating bar chart")
+
+    st.write("Total amount made per year off of tickets.")
+    st.pyplot(fig) 
+    for year in range(len(df['Years'])):
+        st.write(df['Years'].iloc[year], ": $", df['Total Earned'].iloc[year])
+    # can use a switch structure to choose what graph to display based on option 
+elif(option == "Most Spotted Color"):
+    data = pd.read_csv('https://raw.githubusercontent.com/fahadtahir02/NYC-Parking-Ticket-Data-Visualizer/main/data/NYC_Parking_Data.csv')
+    ndata = data[["Violation Code", "Issue Date"]].copy()
+    ndata.transpose()
+    vCode = ndata["Violation Code"].unique()
+    frequency = data['Vehicle Color'].map(data['Vehicle Color'].value_counts())
+    df1 = pd.DataFrame(frequency)
+    df1.rename(columns={"Vehicle Color": "Frequency"}, inplace = "True")
+    cData = data[['Vehicle Color', 'Violation Code']].copy()
+    df_main = pd.concat([cData, df1], axis = 1)
+    df_main = df_main.sort_values('Frequency', ascending=False)
+    df_main = df_main.dropna()
+
+    color_ticket = {
+    "White" : 9,
+    "Silver" : 8,
+    "Grey" : 5,
+    "Black" : 4
+
+    }
+
+    # Data to plot
+    labels = []
+    sizes = []
+
+    for x, y in color_ticket.items():
+        labels.append(x)
+        sizes.append(y)
+
+    # Plot
+    fig = plt.figure(figsize=(13,8))
+    dig = plt.pie(sizes, labels=labels, autopct = '%1.0f%%')
+
+    #plt.axis('equal')
+    #plt.show()
+    st.write()
+    st.pyplot(fig)
+elif(option == "Common Kinds of Tickets"):
+
+    tag_1 = "Standing or parking a vehicle without showing a current New York registration sticker."
+    tag_2 = "Bus Stop: Standing or parking where standing is not allowed by sign, street marking or; traffic control device."
+    tag_3 = "Parking Meter -- Parking in excess of the allowed time"
+    tag_4 = "Standing or parking a vehicle without showing a current New York inspection sticker."
+    tag_5 = "Stopping, standing or parking closer than 15 feet of a fire hydrant"
+    tag_6 = "General No Parking: No parking where parking is not allowed by sign, street marking or traffic control device."
+    tag_7 = "Parking Meter -- Failing to show a receipt or tag in the windshield."
+    tag_8 = "General No Standing: Standing or parking where standing is not allowed by sign, street marking or; traffic control device."
+    tag_9 =  "Standing or parking on the roadway side of a vehicle stopped, standing or parked at the curb; in other words also known as double parking."
+    tag_10 = "Street Cleaning: No parking where parking is not allowed by sign, street marking or traffic control device."
+
+    df3 = df.groupby(['Violation Code']).size().reset_index(name ='Total Amount').sort_values(by='Total Amount')
+    df_top_10 = df3.tail(10)
+    list_int = df_top_10['Violation Code'].tolist()
+
+    fig = go.Figure(data=[go.Bar(x=list(map(str,list_int)), y=df_top_10["Total Amount"].tolist(), 
+            hovertext=[tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9,tag_10])],layout=go.Layout(height=600, width=900))
+    # Customize aspect
+    fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
+                  marker_line_width=1.5, opacity=0.6)
+    fig.update_layout(title_text='Top 10 Most Common Kinds of Tickets',
+                        xaxis_title="Violation Code Number",
+                        yaxis_title="Total Amount")
+
+    st.plotly_chart(fig, use_container_width = True)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+streamlit
+pandas
+numpy
+plotly
+plotly-express
+scikit-learn==0.23.2
+pycaret
+seaborn
+matplotlib
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# NYC-Parking-Ticket-Data-Visualizer
		A Data visualizer that displays the following: Where are the most tickets issues, most common kinds of tickets, Who write the ticket, proportion of out of state to in state and type of car that gets ticketed.