Skip to content

Commit

Permalink
aa
Browse files Browse the repository at this point in the history
  • Loading branch information
JMiltner97 committed Jan 10, 2024
1 parent 0eea3c6 commit 1f9a364
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
Binary file added df_head_image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
65 changes: 65 additions & 0 deletions project/analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import os
from sqlalchemy import create_engine, inspect
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm




script_dir = os.path.dirname(__file__)
Expand Down Expand Up @@ -39,6 +46,8 @@ def count_events(year):
return ((df_bau['startDate'].dt.year <= year) & (df_bau['endDate'].dt.year >= year)).sum()

years_df['Event_Count'] = years_df['Year'].apply(count_events)
years_df = years_df.rename(columns={'Event_Count': 'Baustellen'})


years_df = years_df.merge(df_eis[['Year', 'Beförderte Güter']], on='Year', how='left')
years_df = years_df.rename(columns={'Beförderte Güter': 'Beförderte Güter Eisenbahn'})
Expand All @@ -49,3 +58,59 @@ def count_events(year):

years_df.to_sql(name="baustellen_per_year", con=engine, if_exists="replace", index=False)



years_df['Change in Eisenbahn per Event'] = years_df['Beförderte Güter Eisenbahn'].pct_change() / years_df['Baustellen'].pct_change()
years_df['Change in Straße per Event'] = years_df['Beförderte Güter Straße'].pct_change() / years_df['Baustellen'].pct_change()

# Filtering out the first row as percentage change is not defined for the first entry
years_df = years_df.iloc[1:]
years_df = years_df.loc[years_df['Year'] < 2023]

print(years_df.head(20))

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(years_df['Year'], years_df['Beförderte Güter Eisenbahn'], label='Shipment on rails in kTons', marker='o')
plt.plot(years_df['Year'], years_df['Beförderte Güter Straße'], label='Shipment on road in kTons', marker='x')
plt.title('Goods Transported on rail and road in kTons')
plt.xlabel('Year')
plt.ylabel('Weight in kTons')
plt.legend()
plt.grid(True)
plt.show()


#regression analysis on trains
X = years_df[['Baustellen']]
y = years_df['Beförderte Güter Eisenbahn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")

X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

#regression analysis on streets
X = years_df[['Baustellen']]
y = years_df['Beförderte Güter Straße']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")

X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())
3 changes: 3 additions & 0 deletions project/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ six==1.16.0
SQLAlchemy==1.4.46
typing_extensions==4.5.0
geopandas
matplotlib
sklearn
statsmodels

0 comments on commit 1f9a364

Please sign in to comment.