1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Module Name: Polynomial Interpolation
5
+ Description: A brief demo of inspecting, handling, imputating Curved Missing Data, i.e. 'NaN', with Polynomial Interpolation.
6
+
7
+ Credit / Prepared by:
8
+ Sun CHUNG, SMIEEE
9
+ M.Sc., HKU
10
+ License: MIT License
11
+ """
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ import matplotlib .pyplot as plt
16
+
17
+ # Sample time series data with missing values
18
+ dates = pd .date_range (start = '2020-01-01' , end = '2020-01-20' )
19
+ sales = [200 , np .nan , 210 , np .nan , np .nan , 250 , 260 , np .nan , 280 , 290 ,
20
+ 285 , np .nan , 280 , 270 , np .nan , np .nan , 320 , 325 , np .nan , 330 ]
21
+ data = pd .DataFrame ({'date' : dates , 'sales' : sales })
22
+
23
+ print ("Original Data:" )
24
+ print (data )
25
+
26
+ # Check for missing values
27
+ missing_data = data ['sales' ].isnull ().sum ()
28
+ total_data = len (data )
29
+ missing_percentage = (missing_data / total_data ) * 100
30
+
31
+ print ("\n Missing Values in Each Column:" )
32
+ print (data .isnull ().sum ())
33
+ print (f"Percentage of Missing Data: { missing_percentage :.2f} %" )
34
+
35
+ data ['date_formatted' ] = data ['date' ].dt .strftime ('%d %b' )
36
+
37
+ # Plot the original data to observe its shape
38
+ plt .figure (figsize = (10 , 5 ))
39
+ plt .plot (data ['date_formatted' ], data ['sales' ], label = 'Original Sales' , marker = 'o' )
40
+ plt .title ('Original Sales Data with Missing Values' )
41
+ plt .xlabel ('Date' )
42
+ plt .ylabel ('Sales' )
43
+ plt .legend ()
44
+ plt .grid (True )
45
+ plt .xticks (rotation = 45 )
46
+ plt .show ()
47
+
48
+ # Polynomial Interpolation (Order 2)
49
+ data ['sales_poly' ] = data ['sales' ].interpolate (method = 'polynomial' , order = 2 )
50
+
51
+ print ("\n Data After Polynomial Interpolation (Order 2):" )
52
+ print (data [['date' , 'sales' , 'sales_poly' ]])
53
+
54
+ # Plot the original and interpolated data
55
+ plt .figure (figsize = (10 , 5 ))
56
+ plt .plot (data ['date' ], data ['sales' ], label = 'Original Sales' , marker = 'o' )
57
+ plt .plot (data ['date' ], data ['sales_poly' ], label = 'Polynomial Interpolation' , marker = 'x' )
58
+ plt .title ('Sales Data with Polynomial Interpolation' )
59
+ plt .xlabel ('Date' )
60
+ plt .ylabel ('Sales' )
61
+ plt .legend ()
62
+ plt .grid (True )
63
+ plt .xticks (rotation = 45 )
64
+ plt .show ()
65
+
66
+ # Spline Interpolation (Order 3)
67
+ data ['sales_spline' ] = data ['sales' ].interpolate (method = 'spline' , order = 3 )
68
+
69
+ print ("\n Data After Spline Interpolation (Order 3):" )
70
+ print (data [['date' , 'sales' , 'sales_spline' ]])
71
+
72
+ # Plot the original and interpolated data
73
+ plt .figure (figsize = (10 , 5 ))
74
+ plt .plot (data ['date' ], data ['sales' ], label = 'Original Sales' , marker = 'o' )
75
+ plt .plot (data ['date' ], data ['sales_spline' ], label = 'Spline Interpolation' , marker = 'x' )
76
+ plt .title ('Sales Data with Spline Interpolation' )
77
+ plt .xlabel ('Date' )
78
+ plt .ylabel ('Sales' )
79
+ plt .legend ()
80
+ plt .grid (True )
81
+ plt .xticks (rotation = 45 )
82
+ plt .show ()
0 commit comments