Skip to content

Commit 887b66d

Browse files
committed
added groups for zscore value holder
1 parent 662c5a6 commit 887b66d

File tree

5 files changed

+78
-31
lines changed

5 files changed

+78
-31
lines changed

PyFin/Analysis/CrossSectionValueHolders.pyx

+23-23
Original file line numberDiff line numberDiff line change
@@ -227,46 +227,46 @@ cdef class CSAverageAdjustedSecurityValueHolder(CrossSectionValueHolder):
227227

228228

229229
cdef class CSZScoreSecurityValueHolder(CrossSectionValueHolder):
230-
def __init__(self, innerValue):
231-
super(CSZScoreSecurityValueHolder, self).__init__(innerValue)
230+
def __init__(self, innerValue, groups=None):
231+
super(CSZScoreSecurityValueHolder, self).__init__(innerValue, groups)
232232

233-
@property
234-
def value(self):
233+
cdef _cal_impl(self):
234+
cdef SeriesValues raw_values = self._inner.value
235235

236-
cdef SeriesValues raw_values
236+
if self._group:
237+
self.cached = raw_values.zscore(self._group.value)
238+
else:
239+
self.cached = raw_values.zscore()
240+
self.updated = 1
237241

242+
@property
243+
def value(self):
238244
if self.updated:
239245
return self.cached
240246
else:
241-
raw_values = self._inner.value
242-
self.cached = raw_values.zscore()
243-
self.updated = 1
247+
self._cal_impl()
244248
return self.cached
245249

246-
@cython.cdivision(True)
247250
cpdef double value_by_name(self, name):
248-
249-
cdef SeriesValues raw_values
250-
251251
if self.updated:
252252
return self.cached[name]
253253
else:
254-
raw_values = self._inner.value
255-
self.cached = raw_values.zscore()
256-
self.updated = 1
254+
self._cal_impl()
257255
return self.cached[name]
258256

259-
@cython.cdivision(True)
260257
cpdef SeriesValues value_by_names(self, list names):
261-
262-
cdef SeriesValues raw_values
263-
264-
raw_values = self._inner.value_by_names(names)
265-
raw_values = raw_values.zscore()
266-
return raw_values[names]
258+
cdef SeriesValues raw_values = self._inner.value_by_names(names)
259+
if self._group:
260+
raw_values = raw_values.zscore(self._group.value_by_names(names))
261+
else:
262+
raw_values = raw_values.zscore()
263+
return raw_values
267264

268265
def __str__(self):
269-
return "\mathrm{{CSZScore}}({0})".format(str(self._inner))
266+
if self._group:
267+
return "\mathrm{{CSZscore}}({0}, groups={1})".format(str(self._inner), str(self._group))
268+
else:
269+
return "\mathrm{{CSZscore}}({0})".format(str(self._inner))
270270

271271

272272
cdef class CSResidueSecurityValueHolder(SecurityValueHolder):

PyFin/Analysis/SeriesValues.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ cdef class SeriesValues(object):
1616
cpdef SeriesValues mask(self, flags)
1717
cpdef list index(self)
1818
cpdef SeriesValues rank(self, SeriesValues groups=*)
19-
cpdef SeriesValues zscore(self)
19+
cpdef SeriesValues zscore(self, SeriesValues groups=*)
2020
cpdef SeriesValues unit(self)
2121

2222
cpdef SeriesValues mean(self, SeriesValues groups=*)

PyFin/Analysis/SeriesValues.pyx

+24-3
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,30 @@ cdef class SeriesValues(object):
234234
data[np.isnan(self.values)] = NAN
235235
return SeriesValues(data, self.name_mapping)
236236

237-
cpdef SeriesValues zscore(self):
238-
cdef np.ndarray[double, ndim=1] data = self.values
239-
return SeriesValues((data - nanmean(data)) / nanstd(data), self.name_mapping)
237+
cpdef SeriesValues zscore(self, SeriesValues groups=None):
238+
cdef np.ndarray[double, ndim=1] data
239+
cdef np.ndarray[long long, ndim=1] order
240+
cdef np.ndarray[long long, ndim=1] index_diff
241+
cdef long long diff_loc
242+
cdef long long start = 0
243+
cdef np.ndarray[long long, ndim=1] curr_idx
244+
cdef np.ndarray[double, ndim=1] values = self.values
245+
cdef np.ndarray[double, ndim=1] curr_values
246+
247+
if groups:
248+
data = values.copy()
249+
index_diff, order = groupby(groups.values)
250+
start = 0
251+
for diff_loc in index_diff:
252+
curr_idx = order[start:diff_loc + 1]
253+
curr_values = self.values[curr_idx]
254+
data[curr_idx] = (curr_values - nanmean(curr_values)) / nanstd(curr_values)
255+
start = diff_loc + 1
256+
data[np.isnan(values)] = NAN
257+
else:
258+
data = (values - nanmean(values)) / nanstd(values)
259+
data[np.isnan(values)] = NAN
260+
return SeriesValues(data, self.name_mapping)
240261

241262
cpdef SeriesValues unit(self):
242263
cdef np.ndarray[double, ndim=1] data = self.values

PyFin/api/Analysis.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,16 @@ def CSMean(dependency, groups=None):
5858
return CSAverageSecurityValueHolder(dependency, groups)
5959

6060

61-
def CSMeanAdjusted(dependency):
62-
return CSAverageAdjustedSecurityValueHolder(dependency)
61+
def CSMeanAdjusted(dependency, groups=None):
62+
return CSAverageAdjustedSecurityValueHolder(dependency, groups)
6363

6464

6565
def CSQuantiles(dependency, groups=None):
6666
return CSPercentileSecurityValueHolder(dependency, groups)
6767

6868

69-
def CSZScore(dependency):
70-
return CSZScoreSecurityValueHolder(dependency)
69+
def CSZScore(dependency, groups=None):
70+
return CSZScoreSecurityValueHolder(dependency, groups)
7171

7272

7373
def CSRes(left, right):

PyFin/tests/Analysis/testCrossSectionValueHolders.py

+26
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,32 @@ def testCSZscoreSecurityValueHolder(self):
234234

235235
np.testing.assert_array_almost_equal(expected, calculated.values)
236236

237+
def testCSZscoreSecurityValueHolderWithGroups(self):
238+
benchmark = SecurityLatestValueHolder(x='close')
239+
groups = SecurityLatestValueHolder(x='ind')
240+
meanAdjustedHolder = CSZScoreSecurityValueHolder(benchmark, groups)
241+
242+
for i in range(len(self.datas['aapl']['close'])):
243+
data = {'aapl': {Factors.CLOSE: self.datas['aapl'][Factors.CLOSE][i],
244+
Factors.OPEN: self.datas['aapl'][Factors.OPEN][i],
245+
'ind': 1.},
246+
'ibm': {Factors.CLOSE: self.datas['ibm'][Factors.CLOSE][i],
247+
Factors.OPEN: self.datas['ibm'][Factors.OPEN][i],
248+
'ind': 1.},
249+
'goog': {Factors.CLOSE: self.datas['goog'][Factors.CLOSE][i],
250+
Factors.OPEN: self.datas['goog'][Factors.OPEN][i],
251+
'ind': 2.},
252+
'baba': {Factors.CLOSE: self.datas['baba'][Factors.CLOSE][i],
253+
Factors.OPEN: self.datas['baba'][Factors.OPEN][i],
254+
'ind': 2.}}
255+
benchmark.push(data)
256+
meanAdjustedHolder.push(data)
257+
benchmarkValues = benchmark.value
258+
groups = {'aapl': 1., 'ibm': 1., 'goog': 2., 'baba': 2.}
259+
expected_rank = pd.Series(benchmarkValues.to_dict()).groupby(groups) \
260+
.transform(lambda x: (x - x.mean()) / x.std(ddof=0))
261+
np.testing.assert_array_almost_equal(expected_rank, meanAdjustedHolder.value.values)
262+
237263
def testCSZResidueSecurityValueHolder(self):
238264
y = SecurityLatestValueHolder(x='close')
239265
x = SecurityLatestValueHolder(x='open')

0 commit comments

Comments
 (0)