mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 10:28:02 +00:00
217 lines
6.1 KiB
Python
217 lines
6.1 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import os
|
|
import sys
|
|
import pytest
|
|
import weakref
|
|
|
|
import numpy as np
|
|
import pyarrow as pa
|
|
|
|
|
|
tensor_type_pairs = [
|
|
('i1', pa.int8()),
|
|
('i2', pa.int16()),
|
|
('i4', pa.int32()),
|
|
('i8', pa.int64()),
|
|
('u1', pa.uint8()),
|
|
('u2', pa.uint16()),
|
|
('u4', pa.uint32()),
|
|
('u8', pa.uint64()),
|
|
('f2', pa.float16()),
|
|
('f4', pa.float32()),
|
|
('f8', pa.float64())
|
|
]
|
|
|
|
|
|
def test_tensor_attrs():
|
|
data = np.random.randn(10, 4)
|
|
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
|
|
assert tensor.ndim == 2
|
|
assert tensor.dim_names == []
|
|
assert tensor.size == 40
|
|
assert tensor.shape == data.shape
|
|
assert tensor.strides == data.strides
|
|
|
|
assert tensor.is_contiguous
|
|
assert tensor.is_mutable
|
|
|
|
# not writeable
|
|
data2 = data.copy()
|
|
data2.flags.writeable = False
|
|
tensor = pa.Tensor.from_numpy(data2)
|
|
assert not tensor.is_mutable
|
|
|
|
# With dim_names
|
|
tensor = pa.Tensor.from_numpy(data, dim_names=('x', 'y'))
|
|
assert tensor.ndim == 2
|
|
assert tensor.dim_names == ['x', 'y']
|
|
assert tensor.dim_name(0) == 'x'
|
|
assert tensor.dim_name(1) == 'y'
|
|
|
|
wr = weakref.ref(tensor)
|
|
assert wr() is not None
|
|
del tensor
|
|
assert wr() is None
|
|
|
|
|
|
def test_tensor_base_object():
|
|
tensor = pa.Tensor.from_numpy(np.random.randn(10, 4))
|
|
n = sys.getrefcount(tensor)
|
|
array = tensor.to_numpy() # noqa
|
|
assert sys.getrefcount(tensor) == n + 1
|
|
|
|
|
|
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
|
|
def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
|
|
dtype = np.dtype(dtype_str)
|
|
data = (100 * np.random.randn(10, 4)).astype(dtype)
|
|
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
assert tensor.type == arrow_type
|
|
|
|
repr(tensor)
|
|
|
|
result = tensor.to_numpy()
|
|
assert (data == result).all()
|
|
|
|
|
|
def test_tensor_ipc_roundtrip(tmpdir):
|
|
data = np.random.randn(10, 4)
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
|
|
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
|
|
mmap = pa.create_memory_map(path, 1024)
|
|
|
|
pa.ipc.write_tensor(tensor, mmap)
|
|
|
|
mmap.seek(0)
|
|
result = pa.ipc.read_tensor(mmap)
|
|
|
|
assert result.equals(tensor)
|
|
|
|
|
|
@pytest.mark.gzip
|
|
def test_tensor_ipc_read_from_compressed(tempdir):
|
|
# ARROW-5910
|
|
data = np.random.randn(10, 4)
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
|
|
path = tempdir / 'tensor-compressed-file'
|
|
|
|
out_stream = pa.output_stream(path, compression='gzip')
|
|
pa.ipc.write_tensor(tensor, out_stream)
|
|
out_stream.close()
|
|
|
|
result = pa.ipc.read_tensor(pa.input_stream(path, compression='gzip'))
|
|
assert result.equals(tensor)
|
|
|
|
|
|
def test_tensor_ipc_strided(tmpdir):
|
|
data1 = np.random.randn(10, 4)
|
|
tensor1 = pa.Tensor.from_numpy(data1[::2])
|
|
|
|
data2 = np.random.randn(10, 6, 4)
|
|
tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::])
|
|
|
|
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided')
|
|
mmap = pa.create_memory_map(path, 2048)
|
|
|
|
for tensor in [tensor1, tensor2]:
|
|
mmap.seek(0)
|
|
pa.ipc.write_tensor(tensor, mmap)
|
|
|
|
mmap.seek(0)
|
|
result = pa.ipc.read_tensor(mmap)
|
|
|
|
assert result.equals(tensor)
|
|
|
|
|
|
def test_tensor_equals():
|
|
def eq(a, b):
|
|
assert a.equals(b)
|
|
assert a == b
|
|
assert not (a != b)
|
|
|
|
def ne(a, b):
|
|
assert not a.equals(b)
|
|
assert not (a == b)
|
|
assert a != b
|
|
|
|
data = np.random.randn(10, 6, 4)[::, ::2, ::]
|
|
tensor1 = pa.Tensor.from_numpy(data)
|
|
tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
|
|
eq(tensor1, tensor2)
|
|
data = data.copy()
|
|
data[9, 0, 0] = 1.0
|
|
tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
|
|
ne(tensor1, tensor2)
|
|
|
|
|
|
def test_tensor_hashing():
|
|
# Tensors are unhashable
|
|
with pytest.raises(TypeError, match="unhashable"):
|
|
hash(pa.Tensor.from_numpy(np.arange(10)))
|
|
|
|
|
|
def test_tensor_size():
|
|
data = np.random.randn(10, 4)
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
assert pa.ipc.get_tensor_size(tensor) > (data.size * 8)
|
|
|
|
|
|
def test_read_tensor(tmpdir):
|
|
# Create and write tensor tensor
|
|
data = np.random.randn(10, 4)
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
data_size = pa.ipc.get_tensor_size(tensor)
|
|
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
|
|
write_mmap = pa.create_memory_map(path, data_size)
|
|
pa.ipc.write_tensor(tensor, write_mmap)
|
|
# Try to read tensor
|
|
read_mmap = pa.memory_map(path, mode='r')
|
|
array = pa.ipc.read_tensor(read_mmap).to_numpy()
|
|
np.testing.assert_equal(data, array)
|
|
|
|
|
|
def test_tensor_memoryview():
|
|
# Tensors support the PEP 3118 buffer protocol
|
|
for dtype, expected_format in [(np.int8, '=b'),
|
|
(np.int64, '=q'),
|
|
(np.uint64, '=Q'),
|
|
(np.float16, 'e'),
|
|
(np.float64, 'd'),
|
|
]:
|
|
data = np.arange(10, dtype=dtype)
|
|
dtype = data.dtype
|
|
lst = data.tolist()
|
|
tensor = pa.Tensor.from_numpy(data)
|
|
m = memoryview(tensor)
|
|
assert m.format == expected_format
|
|
assert m.shape == data.shape
|
|
assert m.strides == data.strides
|
|
assert m.ndim == 1
|
|
assert m.nbytes == data.nbytes
|
|
assert m.itemsize == data.itemsize
|
|
assert m.itemsize * 8 == tensor.type.bit_width
|
|
assert np.frombuffer(m, dtype).tolist() == lst
|
|
del tensor, data
|
|
assert np.frombuffer(m, dtype).tolist() == lst
|