Skip to content

Commit 13fbd99

Browse files
authored
python file uploaded for sharing
x2 Demo with sample data (random 'NaN' in every runtime): - Linear interpolation - Polynomial interpolation
1 parent e90edfc commit 13fbd99

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

interpolation.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Module Name: Linear Interpolation
5+
Description: A brief demo of inspecting, handling, imputating Missing Data, i.e. 'NaN', with Linear Interpolation.
6+
7+
Credit / Prepared by:
8+
Sun CHUNG, SMIEEE
9+
M.Sc., HKU
10+
License: MIT License
11+
"""
12+
13+
import pandas as pd
14+
import numpy as np
15+
import matplotlib.pyplot as plt
16+
17+
# Sample time series data with missing values
18+
dates = pd.date_range(start='2020-01-01', end='2020-01-10')
19+
sales = [200, np.nan, 210, np.nan, np.nan, 250, 260, np.nan, 280, 290]
20+
data = pd.DataFrame({'date': dates, 'sales': sales})
21+
22+
print("Original Data:")
23+
data
24+
25+
### By Observing this example, we know it is 'sales' data with the most missing data.
26+
27+
# Check for missing values
28+
missing_data = data['sales'].isnull().sum()
29+
total_data = len(data)
30+
missing_percentage = (missing_data / total_data) * 100
31+
32+
print("\nMissing Values in Each Column:")
33+
print(data.isnull().sum())
34+
print(f"Percentage of Missing Data: {missing_percentage:.2f}%")
35+
36+
# Plot the original data to observe its shape
37+
plt.figure(figsize=(10, 5))
38+
plt.plot(data['date'], data['sales'], label='Original Sales', marker='o')
39+
plt.title('Original Sales Data with Missing Values')
40+
plt.xlabel('Date')
41+
plt.ylabel('Sales')
42+
plt.legend()
43+
plt.grid(True)
44+
plt.xticks(rotation=45)
45+
plt.show()
46+
47+
# Apply linear interpolation
48+
data['sales_interpolated'] = data['sales'].interpolate(method='linear')
49+
50+
print("\nData After Linear Interpolation:")
51+
print(data)
52+
53+
# Plot the original and interpolated data
54+
plt.figure(figsize=(10, 5))
55+
plt.plot(data['date'], data['sales'], label='Original Sales', marker='o')
56+
plt.plot(data['date'], data['sales_interpolated'], label='Interpolated Sales', marker='x')
57+
plt.title('Sales Data with Linear Interpolation')
58+
plt.xlabel('Date')
59+
plt.ylabel('Sales')
60+
plt.legend()
61+
plt.grid(True)
62+
plt.xticks(rotation=45)
63+
plt.show()

polynomial.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Module Name: Polynomial Interpolation
5+
Description: A brief demo of inspecting, handling, imputating Curved Missing Data, i.e. 'NaN', with Polynomial Interpolation.
6+
7+
Credit / Prepared by:
8+
Sun CHUNG, SMIEEE
9+
M.Sc., HKU
10+
License: MIT License
11+
"""
12+
13+
import pandas as pd
14+
import numpy as np
15+
import matplotlib.pyplot as plt
16+
17+
# Sample time series data with missing values
18+
dates = pd.date_range(start='2020-01-01', end='2020-01-20')
19+
sales = [200, np.nan, 210, np.nan, np.nan, 250, 260, np.nan, 280, 290,
20+
285, np.nan, 280, 270, np.nan, np.nan, 320, 325, np.nan, 330]
21+
data = pd.DataFrame({'date': dates, 'sales': sales})
22+
23+
print("Original Data:")
24+
print(data)
25+
26+
# Check for missing values
27+
missing_data = data['sales'].isnull().sum()
28+
total_data = len(data)
29+
missing_percentage = (missing_data / total_data) * 100
30+
31+
print("\nMissing Values in Each Column:")
32+
print(data.isnull().sum())
33+
print(f"Percentage of Missing Data: {missing_percentage:.2f}%")
34+
35+
data['date_formatted'] = data['date'].dt.strftime('%d %b')
36+
37+
# Plot the original data to observe its shape
38+
plt.figure(figsize=(10, 5))
39+
plt.plot(data['date_formatted'], data['sales'], label='Original Sales', marker='o')
40+
plt.title('Original Sales Data with Missing Values')
41+
plt.xlabel('Date')
42+
plt.ylabel('Sales')
43+
plt.legend()
44+
plt.grid(True)
45+
plt.xticks(rotation=45)
46+
plt.show()
47+
48+
# Polynomial Interpolation (Order 2)
49+
data['sales_poly'] = data['sales'].interpolate(method='polynomial', order=2)
50+
51+
print("\nData After Polynomial Interpolation (Order 2):")
52+
print(data[['date', 'sales', 'sales_poly']])
53+
54+
# Plot the original and interpolated data
55+
plt.figure(figsize=(10, 5))
56+
plt.plot(data['date'], data['sales'], label='Original Sales', marker='o')
57+
plt.plot(data['date'], data['sales_poly'], label='Polynomial Interpolation', marker='x')
58+
plt.title('Sales Data with Polynomial Interpolation')
59+
plt.xlabel('Date')
60+
plt.ylabel('Sales')
61+
plt.legend()
62+
plt.grid(True)
63+
plt.xticks(rotation=45)
64+
plt.show()
65+
66+
# Spline Interpolation (Order 3)
67+
data['sales_spline'] = data['sales'].interpolate(method='spline', order=3)
68+
69+
print("\nData After Spline Interpolation (Order 3):")
70+
print(data[['date', 'sales', 'sales_spline']])
71+
72+
# Plot the original and interpolated data
73+
plt.figure(figsize=(10, 5))
74+
plt.plot(data['date'], data['sales'], label='Original Sales', marker='o')
75+
plt.plot(data['date'], data['sales_spline'], label='Spline Interpolation', marker='x')
76+
plt.title('Sales Data with Spline Interpolation')
77+
plt.xlabel('Date')
78+
plt.ylabel('Sales')
79+
plt.legend()
80+
plt.grid(True)
81+
plt.xticks(rotation=45)
82+
plt.show()

0 commit comments

Comments
 (0)