14
14
# See the License for the specific language governing permissions and
15
15
# limitations under the License.
16
16
17
+ import numpy as np
17
18
import math
18
19
import numbers
19
20
@@ -259,7 +260,32 @@ def at(self, index):
259
260
@property
260
261
def indexes (self ):
261
262
"""Get a sequence of filled indexes."""
262
- return sorted (self .keys )
263
+ return sorted (self .bins .keys ())
264
+
265
+ @property
266
+ def binsMap (self ):
267
+ """Input ``bins`` as a key-value map."""
268
+ return self .bins
269
+
270
+ @property
271
+ def size (self ):
272
+ """Number of ``bins``."""
273
+ return len (self .bins )
274
+
275
+ @property
276
+ def keys (self ):
277
+ """Iterable over the keys of the ``bins``."""
278
+ return self .bins .keys ()
279
+
280
+ @property
281
+ def values (self ):
282
+ """Iterable over the values of the ``bins``."""
283
+ return list (self .bins .values ())
284
+
285
+ @property
286
+ def keySet (self ):
287
+ """Set of keys among the ``bins``."""
288
+ return set (self .bins .keys ())
263
289
264
290
def range (self , index ):
265
291
"""Get the low and high edge of a bin (given by index number)."""
@@ -432,48 +458,76 @@ def _c99StructName(self):
432
458
def _numpy (self , data , weights , shape ):
433
459
q = self .quantity (data )
434
460
self ._checkNPQuantity (q , shape )
461
+
462
+ if isinstance (weights , (float , int )) and weights == 1 :
463
+ all_weights_one = True
464
+ elif isinstance (weights , np .ndarray ) and np .all (weights == 1 ):
465
+ all_weights_one = True
466
+ else :
467
+ all_weights_one = False
435
468
self ._checkNPWeights (weights , shape )
436
469
weights = self ._makeNPWeights (weights , shape )
437
470
newentries = weights .sum ()
438
471
439
- import numpy
440
-
441
- selection = numpy .isnan (q )
442
- numpy .bitwise_not (selection , selection )
472
+ selection = np .isnan (q )
473
+ np .bitwise_not (selection , selection ) # invert selection
443
474
subweights = weights .copy ()
444
475
subweights [selection ] = 0.0
445
476
self .nanflow ._numpy (data , subweights , shape )
477
+ subweights [:] = weights
446
478
447
479
# switch to float here like in bin.py else numpy throws
448
480
# TypeError on trivial integer cases such as:
449
- # >>> q = numpy .array([1,2,3,4])
481
+ # >>> q = np .array([1,2,3,4])
450
482
# >>> np.divide(q,1,q)
451
483
# >>> np.floor(q,q)
452
- q = numpy .array (q , dtype = numpy .float64 )
453
- neginfs = numpy .isneginf (q )
454
- posinfs = numpy .isposinf (q )
455
-
456
- numpy .subtract (q , self .origin , q )
457
- numpy .divide (q , self .binWidth , q )
458
- numpy .floor (q , q )
459
- q = numpy .array (q , dtype = numpy .int64 )
484
+ q = np .array (q , dtype = np .float64 )
485
+ neginfs = np .isneginf (q )
486
+ posinfs = np .isposinf (q )
487
+
488
+ np .subtract (q , self .origin , q )
489
+ np .divide (q , self .binWidth , q )
490
+ np .floor (q , q )
491
+ q = np .array (q , dtype = np .int64 )
460
492
q [neginfs ] = LONG_MINUSINF
461
493
q [posinfs ] = LONG_PLUSINF
462
494
463
495
selected = q [weights > 0.0 ]
464
496
465
- selection = numpy .empty (q .shape , dtype = numpy .bool )
466
- for index in numpy .unique (selected ):
467
- if index != LONG_NAN :
468
- bin = self .bins .get (index )
469
- if bin is None :
470
- bin = self .value .zero ()
471
- self .bins [index ] = bin
472
-
473
- numpy .not_equal (q , index , selection )
474
- subweights [:] = weights
475
- subweights [selection ] = 0.0
476
- bin ._numpy (data , subweights , shape )
497
+ # used below. bit expensive, so do here once
498
+ n_dim = self .n_dim
499
+
500
+ if n_dim == 1 and all_weights_one and isinstance (self .value , Count ):
501
+ # special case: filling single array where all weights are 1
502
+ # (use fast np.unique that returns counts)
503
+ uniques , counts = np .unique (selected , return_counts = True )
504
+ for c , index in zip (counts , uniques ):
505
+ if index != LONG_NAN :
506
+ bin = self .bins .get (index )
507
+ if bin is None :
508
+ bin = self .value .zero ()
509
+ self .bins [index ] = bin
510
+ # pass counts directly to Count object
511
+ self .bins [index ]._numpy (None , c , [None ])
512
+ else :
513
+ # all other cases ...
514
+ selection = np .empty (q .shape , dtype = np .bool )
515
+ for index in np .unique (selected ):
516
+ if index != LONG_NAN :
517
+ bin = self .bins .get (index )
518
+ if bin is None :
519
+ bin = self .value .zero ()
520
+ self .bins [index ] = bin
521
+ if n_dim == 1 :
522
+ # passing on the full array is faster for one-dim histograms
523
+ np .not_equal (q , index , selection )
524
+ subweights [:] = weights
525
+ subweights [selection ] = 0.0
526
+ self .bins [index ]._numpy (data , subweights , shape )
527
+ else :
528
+ # in practice passing on sliced arrays is faster for multi-dim histograms
529
+ np .equal (q , index , selection )
530
+ self .bins [index ]._numpy (data [selection ], subweights [selection ], [np .sum (selection )])
477
531
478
532
# no possibility of exception from here on out (for rollback)
479
533
self .entries += float (newentries )
@@ -615,12 +669,12 @@ def __hash__(self):
615
669
616
670
@property
617
671
def n_bins (self ):
618
- """Get number of bins, consistent with SparselyBin and Categorize """
619
- return self .size
672
+ """Get number of filled bins, consistent with SparselyBin and Categorize """
673
+ return len ( self .bins )
620
674
621
675
def num_bins (self , low = None , high = None ):
622
676
"""
623
- Returns number of bins
677
+ Returns number of bins from low to high, including unfilled
624
678
625
679
Possible to set range with low and high params
626
680
@@ -629,7 +683,6 @@ def num_bins(self, low=None, high=None):
629
683
:returns: number of bins in range
630
684
:rtype: int
631
685
"""
632
- import numpy as np
633
686
# sparse hist not filled
634
687
if self .minBin is None or self .maxBin is None :
635
688
return 0
@@ -672,7 +725,6 @@ def bin_edges(self, low=None, high=None):
672
725
:returns: numpy array with bin edges for selected range
673
726
:rtype: numpy.array
674
727
"""
675
- import numpy as np
676
728
# sparse hist not filled
677
729
if self .minBin is None or self .maxBin is None :
678
730
return np .array ([self .origin , self .origin + 1 ])
@@ -715,7 +767,6 @@ def bin_entries(self, low=None, high=None, xvalues=[]):
715
767
:returns: numpy array with numbers of entries for selected bins
716
768
:rtype: numpy.array
717
769
"""
718
- import numpy as np
719
770
# sparse hist not filled
720
771
if self .minBin is None or self .maxBin is None :
721
772
return np .array ([])
@@ -757,10 +808,8 @@ def bin_centers(self, low=None, high=None):
757
808
:returns: numpy array with bin centers for selected range
758
809
:rtype: numpy.array
759
810
"""
760
- import numpy as np
761
811
bin_edges = self .bin_edges (low , high )
762
- centers = [(bin_edges [i ] + bin_edges [i + 1 ]) / 2. for i in range (len (bin_edges ) - 1 )]
763
- return np .array (centers )
812
+ return (bin_edges [:- 1 ] + bin_edges [1 :]) / 2
764
813
765
814
@property
766
815
def mpv (self ):
0 commit comments