-
Notifications
You must be signed in to change notification settings - Fork 19
/
data.py
15645 lines (12215 loc) · 478 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import itertools
import logging
import operator
from functools import reduce as functools_reduce
from json import dumps as json_dumps
from json import loads as json_loads
from math import ceil as math_ceil
from operator import itemgetter
from operator import mul as operator_mul
try:
from scipy.ndimage.filters import convolve1d as scipy_convolve1d
except ImportError:
pass
import cfdm
import cftime
import numpy
# from numpy import arctan2 as numpy_arctan2 TODO AT2
from numpy import arange as numpy_arange
from numpy import arccos as numpy_arccos
from numpy import arccosh as numpy_arccosh
from numpy import arcsin as numpy_arcsin
from numpy import arcsinh as numpy_arcsinh
from numpy import arctan as numpy_arctan
from numpy import arctanh as numpy_arctanh
from numpy import array as numpy_array
from numpy import asanyarray as numpy_asanyarray
from numpy import bool_ as numpy_bool_
from numpy import ceil as numpy_ceil
from numpy import cos as numpy_cos
from numpy import cosh as numpy_cosh
from numpy import cumsum as numpy_cumsum
from numpy import diff as numpy_diff
from numpy import digitize as numpy_digitize
from numpy import dtype as numpy_dtype
from numpy import empty as numpy_empty
from numpy import errstate as numpy_errstate
from numpy import exp as numpy_exp
from numpy import expand_dims as numpy_expand_dims
from numpy import finfo as numpy_finfo
from numpy import floating as numpy_floating
from numpy import floor as numpy_floor
from numpy import integer as numpy_integer
from numpy import isnan as numpy_isnan
from numpy import linspace as numpy_linspace
from numpy import log as numpy_log
from numpy import log2 as numpy_log2
from numpy import log10 as numpy_log10
from numpy import nan as numpy_nan
from numpy import nanpercentile as numpy_nanpercentile
from numpy import ndarray as numpy_ndarray
from numpy import ndenumerate as numpy_ndenumerate
from numpy import ndim as numpy_ndim
from numpy import ndindex as numpy_ndindex
from numpy import newaxis as numpy_newaxis
from numpy import ones as numpy_ones
from numpy import percentile as numpy_percentile
from numpy import prod as numpy_prod
from numpy import ravel_multi_index as numpy_ravel_multi_index
from numpy import reshape as numpy_reshape
from numpy import result_type as numpy_result_type
from numpy import rint as numpy_rint
from numpy import round as numpy_round
from numpy import seterr as numpy_seterr
from numpy import shape as numpy_shape
from numpy import sin as numpy_sin
from numpy import sinh as numpy_sinh
from numpy import size as numpy_size
from numpy import tan as numpy_tan
from numpy import tanh as numpy_tanh
from numpy import tile as numpy_tile
from numpy import trunc as numpy_trunc
from numpy import unique as numpy_unique
from numpy import unravel_index as numpy_unravel_index
from numpy import vectorize as numpy_vectorize
from numpy import where as numpy_where
from numpy import zeros as numpy_zeros
from numpy.ma import MaskedArray as numpy_ma_MaskedArray
from numpy.ma import array as numpy_ma_array
from numpy.ma import count as numpy_ma_count
from numpy.ma import empty as numpy_ma_empty
from numpy.ma import filled as numpy_ma_filled
from numpy.ma import is_masked as numpy_ma_is_masked
from numpy.ma import isMA as numpy_ma_isMA
from numpy.ma import masked as numpy_ma_masked
from numpy.ma import masked_all as numpy_ma_masked_all
from numpy.ma import masked_invalid as numpy_ma_masked_invalid
from numpy.ma import masked_where as numpy_ma_masked_where
from numpy.ma import nomask as numpy_ma_nomask
from numpy.ma import where as numpy_ma_where
from numpy.testing import suppress_warnings as numpy_testing_suppress_warnings
from .. import mpi_on # TODODASK : remove when move to dask is complete
from ..cfdatetime import dt as cf_dt
from ..cfdatetime import dt2rt, rt2dt, st2rt
from ..constants import masked as cf_masked
from ..decorators import (
_deprecated_kwarg_check,
_display_or_return,
_inplace_enabled,
_inplace_enabled_define_and_cleanup,
_manage_log_level_via_verbosity,
)
from ..functions import (
_DEPRECATION_ERROR_ATTRIBUTE,
_DEPRECATION_ERROR_METHOD,
_numpy_allclose,
_numpy_isclose,
_section,
abspath,
)
from ..functions import atol as cf_atol
from ..functions import broadcast_array
from ..functions import chunksize as cf_chunksize
from ..functions import collapse_parallel_mode, default_netCDF_fillvals
from ..functions import fm_threshold as cf_fm_threshold
from ..functions import free_memory, hash_array
from ..functions import inspect as cf_inspect
from ..functions import parse_indices, pathjoin
from ..functions import rtol as cf_rtol
from ..mixin_container import Container
from ..units import Units
from . import (
GatheredSubarray,
NetCDFArray,
RaggedContiguousSubarray,
RaggedIndexedContiguousSubarray,
RaggedIndexedSubarray,
UMArray,
)
from .collapse_functions import (
max_abs_f,
max_abs_ffinalise,
max_abs_fpartial,
max_f,
max_ffinalise,
max_fpartial,
mean_abs_f,
mean_abs_ffinalise,
mean_abs_fpartial,
mean_f,
mean_ffinalise,
mean_fpartial,
mid_range_f,
mid_range_ffinalise,
mid_range_fpartial,
min_abs_f,
min_abs_ffinalise,
min_abs_fpartial,
min_f,
min_ffinalise,
min_fpartial,
range_f,
range_ffinalise,
range_fpartial,
root_mean_square_f,
root_mean_square_ffinalise,
root_mean_square_fpartial,
sample_size_f,
sample_size_ffinalise,
sample_size_fpartial,
sd_f,
sd_ffinalise,
sd_fpartial,
sum_f,
sum_ffinalise,
sum_fpartial,
sum_of_squares_f,
sum_of_squares_ffinalise,
sum_of_squares_fpartial,
sw2_f,
sw2_ffinalise,
sw2_fpartial,
sw_f,
sw_ffinalise,
sw_fpartial,
var_f,
var_ffinalise,
var_fpartial,
)
from .filledarray import FilledArray
from .partition import Partition
from .partitionmatrix import PartitionMatrix
if mpi_on: # TODODASK : remove when move to dask is complete
from mpi4py.MPI import SUM as mpi_sum
from .. import mpi_comm, mpi_rank, mpi_size
logger = logging.getLogger(__name__)
# --------------------------------------------------------------------
# Constants
# --------------------------------------------------------------------
_year_length = 365.242198781
_month_length = _year_length / 12
def _convert_to_builtin_type(x):
"""Convert a non-JSON-encodable object to a JSON-encodable built-in
type.
Possible conversions are:
================ ======= ================================
Input Output `numpy` data-types covered
================ ======= ================================
`numpy.bool_` `bool` bool
`numpy.integer` `int` int, int8, int16, int32, int64,
uint8, uint16, uint32, uint64
`numpy.floating` `float` float, float16, float32, float64
================ ======= ================================
:Parameters:
x:
`numpy.bool_` or `numpy.integer` or `numpy.floating`
The object of some numpy primitive data type.
:Returns:
`bool` or `int` or `float`
The object converted to a JSON-encodable type.
**Examples:**
>>> type(_convert_to_builtin_type(numpy.bool_(True)))
bool
>>> type(_convert_to_builtin_type(numpy.array([1.0])[0]))
double
>>> type(_convert_to_builtin_type(numpy.array([2])[0]))
int
"""
if isinstance(x, numpy_bool_):
return bool(x)
if isinstance(x, numpy_integer):
return int(x)
if isinstance(x, numpy_floating):
return float(x)
raise TypeError(
"{0!r} object is not JSON serializable: {1!r}".format(type(x), x)
)
# --------------------------------------------------------------------
# _seterr = How floating-point errors in the results of arithmetic
# operations are handled. These defaults are those of
# numpy 1.10.1.
# --------------------------------------------------------------------
_seterr = {
"divide": "warn",
"invalid": "warn",
"over": "warn",
"under": "ignore",
}
# --------------------------------------------------------------------
# _seterr_raise_to_ignore = As _seterr but with any values of 'raise'
# changed to 'ignore'.
# --------------------------------------------------------------------
_seterr_raise_to_ignore = _seterr.copy()
for key, value in _seterr.items():
if value == "raise":
_seterr_raise_to_ignore[key] = "ignore"
# --- End: for
# --------------------------------------------------------------------
# _mask_fpe[0] = Whether or not to automatically set
# FloatingPointError exceptions to masked values in
# arimthmetic.
# --------------------------------------------------------------------
_mask_fpe = [False]
_xxx = numpy_empty((), dtype=object)
_empty_set = set()
_units_None = Units()
_units_1 = Units("1")
_units_radians = Units("radians")
_dtype_object = numpy_dtype(object)
_dtype_float = numpy_dtype(float)
_dtype_bool = numpy_dtype(bool)
_cached_axes = {0: []}
def _initialise_axes(ndim):
"""Initialise dimension identifiers of N-d data.
:Parameters:
ndim: `int`
The number of dimensions in the data.
:Returns:
`list`
The dimension identifiers, one of each dimension in the
array. If the data is scalar thn the list will be empty.
**Examples:**
>>> _initialise_axes(0)
[]
>>> _initialise_axes(1)
['dim1']
>>> _initialise_axes(3)
['dim1', 'dim2', 'dim3']
>>> _initialise_axes(3) is _initialise_axes(3)
True
"""
axes = _cached_axes.get(ndim, None)
if axes is None:
axes = ["dim%d" % i for i in range(ndim)]
_cached_axes[ndim] = axes
return axes
class Data(Container, cfdm.Data):
"""An N-dimensional data array with units and masked values.
* Contains an N-dimensional, indexable and broadcastable array with
many similarities to a `numpy` array.
* Contains the units of the array elements.
* Supports masked arrays, regardless of whether or not it was
initialised with a masked array.
* Stores and operates on data arrays which are larger than the
available memory.
**Indexing**
A data array is indexable in a similar way to numpy array:
>>> d.shape
(12, 19, 73, 96)
>>> d[...].shape
(12, 19, 73, 96)
>>> d[slice(0, 9), 10:0:-2, :, :].shape
(9, 5, 73, 96)
There are three extensions to the numpy indexing functionality:
* Size 1 dimensions are never removed by indexing.
An integer index i takes the i-th element but does not reduce the
rank of the output array by one:
>>> d.shape
(12, 19, 73, 96)
>>> d[0, ...].shape
(1, 19, 73, 96)
>>> d[:, 3, slice(10, 0, -2), 95].shape
(12, 1, 5, 1)
Size 1 dimensions may be removed with the `squeeze` method.
* The indices for each axis work independently.
When more than one dimension's slice is a 1-d boolean sequence or
1-d sequence of integers, then these indices work independently
along each dimension (similar to the way vector subscripts work in
Fortran), rather than by their elements:
>>> d.shape
(12, 19, 73, 96)
>>> d[0, :, [0, 1], [0, 13, 27]].shape
(1, 19, 2, 3)
* Boolean indices may be any object which exposes the numpy array
interface.
>>> d.shape
(12, 19, 73, 96)
>>> d[..., d[0, 0, 0]>d[0, 0, 0].min()]
**Cyclic axes**
**Miscellaneous**
A `Data` object is picklable.
A `Data` object is hashable, but note that, since it is mutable, its
hash value is only valid whilst the data array is not changed in
place.
"""
def __init__(
self,
array=None,
units=None,
calendar=None,
fill_value=None,
hardmask=True,
chunk=True,
loadd=None,
loads=None,
dt=False,
source=None,
copy=True,
dtype=None,
mask=None,
_use_array=True,
):
"""**Initialization**
:Parameters:
array: optional
The array of values. May be any scalar or array-like
object, including another `Data` instance. Ignored if the
*source* parameter is set.
*Parameter example:*
``array=[34.6]``
*Parameter example:*
``array=[[1, 2], [3, 4]]``
*Parameter example:*
``array=numpy.ma.arange(10).reshape(2, 1, 5)``
units: `str` or `Units`, optional
The physical units of the data. if a `Units` object is
provided then this an also set the calendar. Ignored if
the *source* parameter is set.
The units (without the calendar) may also be set after
initialisation with the `set_units` method.
*Parameter example:*
``units='km hr-1'``
*Parameter example:*
``units='days since 2018-12-01'``
calendar: `str`, optional
The calendar for reference time units. Ignored if the
*source* parameter is set.
The calendar may also be set after initialisation with the
`set_calendar` method.
*Parameter example:*
``calendar='360_day'``
fill_value: optional
The fill value of the data. By default, or if set to
`None`, the `numpy` fill value appropriate to the array's
data-type will be used (see
`numpy.ma.default_fill_value`). Ignored if the *source*
parameter is set.
The fill value may also be set after initialisation with
the `set_fill_value` method.
*Parameter example:*
``fill_value=-999.``
dtype: data-type, optional
The desired data-type for the data. By default the
data-type will be inferred form the *array* parameter.
The data-type may also be set after initialisation with
the `dtype` attribute.
*Parameter example:*
``dtype=float``
*Parameter example:*
``dtype='float32'``
*Parameter example:*
``dtype=numpy.dtype('i2')``
.. versionadded:: 3.0.4
mask: optional
Apply this mask to the data given by the *array*
parameter. By default, or if *mask* is `None`, no mask is
applied. May be any scalar or array-like object (such as a
`list`, `numpy` array or `Data` instance) that is
broadcastable to the shape of *array*. Masking will be
carried out where the mask elements evaluate to `True`.
This mask will applied in addition to any mask already
defined by the *array* parameter.
.. versionadded:: 3.0.5
source: optional
Initialize the array, units, calendar and fill value from
those of *source*.
hardmask: `bool`, optional
If False then the mask is soft. By default the mask is
hard.
dt: `bool`, optional
If True then strings (such as ``'1990-12-01 12:00'``)
given by the *array* parameter are re-interpreted as
date-time objects. By default they are not.
loadd: `dict`, optional
Initialise the data from a dictionary serialization of a
`cf.Data` object. All other arguments are ignored. See the
`dumpd` and `loadd` methods.
loads: `str`, optional
Initialise the data array from a string serialization of a
`Data` object. All other arguments are ignored. See the
`dumps` and `loads` methods.
copy: `bool`, optional
If False then do not deep copy input parameters prior to
initialization. By default arguments are deep copied.
chunk: `bool`, optional
If False then the data array will be stored in a single
partition. By default the data array will be partitioned
if it is larger than the chunk size, as returned by the
`cf.chunksize` function.
**Examples:**
>>> d = cf.Data(5)
>>> d = cf.Data([1,2,3], units='K')
>>> import numpy
>>> d = cf.Data(numpy.arange(10).reshape(2,5),
... units=Units('m/s'), fill_value=-999)
>>> d = cf.Data(tuple('fly'))
"""
data = array
super().__init__(source=source, fill_value=fill_value)
if source is not None:
partitions = self._custom.get("partitions")
if partitions is not None:
self.partitions = partitions.copy()
auxiliary_mask = self._custom.get("_auxiliary_mask")
if auxiliary_mask is not None:
self._auxiliary_mask = [mask.copy() for mask in auxiliary_mask]
return
if not (loadd or loads):
units = Units(units, calendar=calendar)
self._Units = units
empty_list = []
# The _flip attribute is an unordered subset of the data
# array's axis names. It is a subset of the axes given by the
# _axes attribute. It is used to determine whether or not to
# reverse an axis in each partition's sub-array during the
# creation of the partition's data array. DO NOT CHANGE IN
# PLACE.
self._flip(empty_list)
# The _all_axes attribute must be None or a tuple
self._all_axes = None
self.hardmask = hardmask
# The _HDF_chunks attribute is.... Is either None or a
# dictionary. DO NOT CHANGE IN PLACE.
self._HDF_chunks = None
# ------------------------------------------------------------
# Attribute: _auxiliary_mask
#
# Must be None or a (possibly empty) list of Data objects.
# ------------------------------------------------------------
self._auxiliary_mask = None
if loadd is not None:
self.loadd(loadd, chunk=chunk)
return
if loads is not None:
self.loads(loads, chunk=chunk)
return
# The _cyclic attribute contains the axes of the data array
# which are cyclic (and therefore allow cyclic slicing). It is
# a subset of the axes given by the _axes attribute. DO NOT
# CHANGE IN PLACE.
self._cyclic = _empty_set
data = array
if data is None:
if dtype is not None:
dtype = numpy_dtype(dtype)
self._dtype = dtype
return
# if not isinstance(data, Array):
if not self._is_abstract_Array_subclass(data):
check_free_memory = True
if isinstance(data, self.__class__):
# self.loadd(data.dumpd(), chunk=chunk)
self.__dict__ = data.copy().__dict__
if chunk:
self.chunk()
if mask is not None:
self.where(mask, cf_masked, inplace=True)
return
if not isinstance(data, numpy_ndarray):
data = numpy_asanyarray(data)
if (
data.dtype.kind == "O"
and not dt
and hasattr(data.item((0,) * data.ndim), "timetuple")
):
# We've been given one or more date-time objects
dt = True
else:
check_free_memory = False
_dtype = data.dtype
if dt or units.isreftime:
# TODO raise exception if compressed
kind = _dtype.kind
if kind in "US":
# Convert date-time strings to reference time floats
if not units:
YMD = str(data.item((0,) * data.ndim)).partition("T")[0]
units = Units("days since " + YMD, units._calendar)
self._Units = units
data = st2rt(data, units, units)
_dtype = data.dtype
elif kind == "O":
# Convert date-time objects to reference time floats
x = data.item(0)
x_since = "days since " + "-".join(
map(str, (x.year, x.month, x.day))
)
x_calendar = getattr(x, "calendar", "gregorian")
d_calendar = getattr(self.Units, "calendar", None)
d_units = getattr(self.Units, "units", None)
if x_calendar != "":
if d_calendar is not None:
if not self.Units.equivalent(
Units(x_since, x_calendar)
):
raise ValueError(
"Incompatible units: {!r}, {!r}".format(
self.Units, Units(x_since, x_calendar)
)
)
else:
d_calendar = x_calendar
# --- End: if
if not units:
# Set the units to something that is (hopefully)
# close to all of the datetimes, in an attempt to
# reduce errors arising from the conversion to
# reference times
units = Units(x_since, calendar=d_calendar)
else:
units = Units(d_units, calendar=d_calendar)
self._Units = units
# Check that all date-time objects have correct and
# equivalent calendars
calendars = set(
[getattr(x, "calendar", "gregorian") for x in data.flat]
)
if len(calendars) > 1:
raise ValueError(
"Not all date-time objects have equivalent "
"calendars: {}".format(tuple(calendars))
)
# If the date-times are calendar-agnostic, assign the
# given calendar, defaulting to Gregorian.
if calendars.pop() == "":
calendar = getattr(self.Units, "calendar", "gregorian")
new_data = numpy.empty(numpy_shape(data), dtype="O")
for i in numpy_ndindex(new_data.shape):
new_data[i] = cf_dt(data[i], calendar=calendar)
data = new_data
# Convert the date-time objects to reference times
data = dt2rt(data, None, units)
_dtype = data.dtype
if not units.isreftime:
raise ValueError(
"Can't initialise a reference time array with "
"units {!r}".format(units)
)
# --- End: if
shape = data.shape
ndim = data.ndim
size = data.size
axes = _initialise_axes(ndim)
# The _axes attribute is the ordered list of the data array's
# axis names. Each axis name is an arbitrary, unique
# string. DO NOT CHANGE IN PLACE.
self._axes = axes
self._ndim = ndim
self._shape = shape
self._size = size
if dtype is not None:
_dtype = numpy_dtype(dtype)
self._dtype = _dtype
self._set_partition_matrix(
data, chunk=chunk, check_free_memory=check_free_memory
)
# if isinstance(data, CompressedArray):
# self._set_CompressedArray(data,
# axes=axes)
# #if mask is not None:
# # self.where(mask, cf_masked, inplace=True)
##
# # r#eturn
# else:
# matrix = _xxx.copy()
#
# matrix[()] = Partition(location = [(0, n) for n in shape],
# shape = list(shape),
# axes = axes,
# flip = empty_list,
# Units = units,
# subarray = data,
# part = empty_list)
#
# self.partitions = PartitionMatrix(matrix, empty_list)
#
# if check_free_memory and free_memory() < cf_fm_threshold():
# self.to_disk()
#
# if chunk:
# self.chunk()
# # --- End: if
if mask is not None:
self.where(mask, cf_masked, inplace=True)
def _set_partition_matrix(self, array, chunk=True, check_free_memory=True):
"""Set the array.
:Parameters:
array: subclass of `Array`
The array to be inserted.
check_free_memory: `bool`, optional
If True then store the data array on disk if there is
is sufficient memory there.
:Returns:
`None`
**Examples:**
>>> d._set_partition_matrix(array)
"""
# if isinstance(array, CompressedArray):
get_compression_type = getattr(array, "get_compression_type", None)
if get_compression_type is not None and get_compression_type():
# array is compressed
self._set_CompressedArray(
array, check_free_memory=check_free_memory
)
return
empty_list = []
shape = array.shape
matrix = _xxx.copy()
matrix[()] = Partition(
location=[(0, n) for n in shape],
shape=list(shape),
axes=self._axes,
flip=empty_list,
Units=self.Units,
subarray=array,
part=empty_list,
)
self.partitions = PartitionMatrix(matrix, empty_list)
if check_free_memory and free_memory() < cf_fm_threshold():
self.to_disk()
if chunk:
self.chunk()
source = self.source(None)
if source is not None and source.get_compression_type():
self._del_Array(None)
def _set_CompressedArray(
self, compressed_array, copy=None, check_free_memory=True
):
"""Create and insert a partition matrix for a compressed array.
.. versionadded:: 3.0.6
.. seealso:: `_set_Array`, `_set_partition_matrix`, `compress`
:Parameters:
compressed_array: subclass of `CompressedArray`
copy: optional
Ignored.
check_free_memory: `bool`, optional
If True then store the data array on disk if there is
is sufficient memory there.
:Returns:
`None`
"""
if check_free_memory and free_memory() < cf_fm_threshold():
compressed_array.to_disk()
new = type(self).empty(
shape=compressed_array.shape, units=self.Units, chunk=False
)
new._axes = self._axes
source_data = compressed_array.source()
compression_type = compressed_array.get_compression_type()
if compression_type == "ragged contiguous":
# --------------------------------------------------------
# Ragged contiguous
# --------------------------------------------------------
new.chunk(total=[0], omit_axes=[1])
count = compressed_array.get_count().array
start = 0
for n, partition in zip(count, new.partitions.flat):
end = start + n
partition.subarray = RaggedContiguousSubarray(
array=source_data,
shape=partition.shape,
compression={
"instance_axis": 0,
"instance_index": 0,
"c_element_axis": 1,
"c_element_indices": slice(start, end),
},
)
partition.part = []
start += n
elif compression_type == "ragged indexed":
# --------------------------------------------------------
# Ragged indexed
# --------------------------------------------------------
new.chunk(total=[0], omit_axes=[1])
index = compressed_array.get_index().array
(instances, inverse) = numpy.unique(index, return_inverse=True)
for i, partition in zip(
numpy.unique(inverse), new.partitions.flat
):
partition.subarray = RaggedIndexedSubarray(
array=source_data,
shape=partition.shape,
compression={
"instance_axis": 0,
"instance_index": 0,
"i_element_axis": 1,
"i_element_indices": numpy_where(inverse == i)[0],
},
)
partition.part = []
elif compression_type == "ragged indexed contiguous":
# --------------------------------------------------------
# Ragged indexed contiguous
# --------------------------------------------------------
new.chunk(total=[0, 1], omit_axes=[2])
index = compressed_array.get_index().array
count = compressed_array.get_count().array
(instances, inverse) = numpy.unique(index, return_inverse=True)
new_partitions = new.partitions.matrix
shape = compressed_array.shape
for i in range(shape[0]):
# For all of the profiles in ths instance, find the
# locations in the count array of the number of
# elements in the profile
xprofile_indices = numpy.where(index == i)[0]
# Find the number of profiles in this instance
n_profiles = xprofile_indices.size
# Loop over profiles in this instance
for j in range(shape[1]):
partition = new_partitions[i, j]
if j >= n_profiles:
# This partition is full of missing data
subarray = FilledArray(
shape=partition.shape,
size=partition.size,
ndim=partition.ndim,
dtype=compressed_array.dtype,
fill_value=cf_masked,
)
else:
# Find the location in the count array of the number
# of elements in this profile
profile_index = xprofile_indices[j]
if profile_index == 0:
start = 0
else:
start = int(count[:profile_index].sum())
stop = start + int(count[profile_index])
subarray = RaggedIndexedContiguousSubarray(
array=source_data,
shape=partition.shape,
compression={
"instance_axis": 0,
"instance_index": 0,
"i_element_axis": 1,
"i_element_index": 0,
"c_element_axis": 2,
"c_element_indices": slice(start, stop),
},
)
# --- End: if
partition.subarray = subarray
partition.part = []
# --- End: for
# --- End: for
elif compression_type == "gathered":
# --------------------------------------------------------
# Gathered
# --------------------------------------------------------
compressed_dimension = compressed_array.get_compressed_dimension()