This repository has been archived by the owner on Jan 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_activity.py
executable file
·218 lines (170 loc) · 6.48 KB
/
plot_activity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#!/usr/bin/env python3
import datetime
import matplotlib.pyplot as plt
import numpy as np
import sys
#import scipy
################################################################################
def create_window(n):
'''
Create a sampling window.
Args:
n: odd positive integer
Returns:
array with maximum value in the middle and small elements towards the end
'''
# check if 'n' is odd
if n % 2 == 0 or n < 0:
raise ValueError('Only odd-length windows can be created.')
centre = (n - 1) / 2
window = [(n - np.abs(i - centre)) / n ** 2 for i in range(n)]
return window
################################################################################
def moving_correlation(big, small, centre):
'''
Calculate the correlation of a window sequence with a chunk of a big sequence.
Args:
big: main sequence
small: window sequence
centre: int indicating where to overlap 'small' with 'big'
'''
window_size = len(small)
# slice 'big' to obtain a sequence as long as 'small'
# where to slice it depends on 'centre'
# slice it in such a way that 'big[centre]' is the middle element of 'arr'
arr = big[centre - (window_size - 1) // 2 : centre + (window_size - 1) // 2 + 1]
# correlate them
return np.dot(arr, small)
################################################################################
if __name__ == '__main__':
with open('status', 'w') as status_file:
print('Last Run: {}'.format(sys.argv), file = status_file)
print('Arguments: {}'.format(len(sys.argv)), file = status_file)
# plot the training data
# set up the starting date and the activity numbers
current_date = datetime.date(2005, 1, 1)
activity = []
# read the file for activity numbers by date
with open('bright_patches.dat') as data_file:
for line in data_file:
# each line begins with a string containing the date
# read the line and find out what the date is
y = int(line[3 : 7])
m = int(line[7 : 9])
d = int(line[9 : 11])
# some dates are missing in 'data_file'
# so, keep incrementing 'current_date' until a mathc is found
while current_date != datetime.date(y, m, d):
current_date += datetime.timedelta(days = 1)
activity.append(0)
# some dates have an invalid number next to them
# for those, use 'np.nan'
try:
activity.append(int(line[19 :].strip()))
except ValueError:
activity.append(0)
current_date += datetime.timedelta(days = 1)
# create a window to correlate
window_size = 31
gaussian_window = create_window(window_size)
# to make correlation calculation easy, place 365 zeros in front of 'activity'
spots = np.concatenate([np.zeros(365), activity])
# find the shift from 1st January which gives the best correlation
best_correlation = 0
best_prediction = -100
# 'prediction' is the shift from 1st January
# it indicates the shift 'gaussian_window' must be given before correlating
# for each shift, calculate the weighted average of of the correlation with each major spike
# the best shift will have the greatest weighted average
for prediction in range(-100, 100):
correlation = np.average([moving_correlation(spots, gaussian_window, i + prediction) for i in [365, 1085, 1450, 1826]] , axis = None, weights= [0.1,0.2,0.3,0.4])
if correlation > best_correlation:
best_correlation = correlation
best_prediction = prediction
print('Predicted day for first spike '+str(best_prediction))
# predict the second spike
# the minor spikes seem to be linear
# the gaps betwen them are approximately constant
# hence, use linear polynomial interpolation
# first, locate all these minor spikes
someday = []
i = 100
while i < len(activity):
if 20 <= activity[i] <= 30:
someday.append(i)
i += 300
i += 1
# find the linear polynomial which best fits these points
coeffs = np.polyfit(range(5),someday,1)
# evaluate the polynomial at the next point
# that is the predicted location of the next spike
predicted = int(np.polyval(coeffs,5)-1826)
print('Predicted day for second spike '+str(predicted))
# open a window to plot the training data
plt.style.use('classic')
fig = plt.figure()
fig.canvas.set_window_title('Variation of Solar Activity')
ax = fig.add_subplot(1, 1, 1)
ax.axhline(linewidth = 1.6, color = 'k')
ax.axvline(linewidth = 1.6, color = 'k')
ax.plot(activity, 'r-', label = 'number of bright solar regions')
with open('status', 'a') as status_file:
print('training data: {}'.format(len(activity)), file = status_file)
# beautification
ax.grid(True, linewidth = 0.4)
ax.legend()
ax.set_xlabel('time')
ax.set_xlim(0, 1826)
ax.set_xticks([0, 365, 720, 1085, 1451])
# ax.set_xticklabels(['2005', '2006', '2007', '2008', '2009'])
ax.set_ylabel('solar activity')
ax.set_ylim(0, 80)
ax.set_title('Variation of Solar Activity, 2005 to 2009')
# plt.show()
########################################
# plot the testing data
# set up the starting date and the activity numbers
current_date = datetime.date(2010, 1, 1)
activity = []
# read the file for activity numbers by date
with open('training.dat') as data_file:
for line in data_file:
# each line begins with a string containing the date
# read the line and find out what the date is
y = int(line[3 : 7])
m = int(line[7 : 9])
d = int(line[9 : 11])
# some dates are missing in 'data_file'
# so, keep incrementing 'current_date' until a mathc is found
while current_date != datetime.date(y, m, d):
current_date += datetime.timedelta(days = 1)
activity.append(np.nan)
# some dates have an invalid number next to them
# for those, use 'np.nan'
try:
activity.append(int(line[19 :].strip()))
except ValueError:
activity.append(np.nan)
current_date += datetime.timedelta(days = 1)
# open a window to plot the testing data
plt.style.use('classic')
fig = plt.figure()
fig.canvas.set_window_title('Variation of Solar Activity')
ax = fig.add_subplot(1, 1, 1)
ax.axhline(linewidth = 1.6, color = 'k')
ax.axvline(linewidth = 1.6, color = 'k')
ax.plot(activity, 'r-', label = 'number of bright solar regions')
with open('status', 'a') as status_file:
print('testing data: {}'.format(len(activity)), file = status_file)
# beautification
ax.grid(True, linewidth = 0.4)
ax.legend()
ax.set_xlabel('time')
ax.set_xlim(0, 243)
ax.set_xticks([0, 31, 59, 90, 120, 151, 181, 212, 243])
# ax.set_xticklabels(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August'])
ax.set_ylabel('solar activity')
ax.set_ylim(0, 80)
ax.set_title('Variation of Solar Activity, 2010')
plt.show()
# open a window to plot the testing data