mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 10:28:02 +00:00
314 lines
8.0 KiB
Python
314 lines
8.0 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import os
|
|
import pathlib
|
|
import subprocess
|
|
from tempfile import TemporaryDirectory
|
|
|
|
import pytest
|
|
import hypothesis as h
|
|
|
|
from pyarrow.util import find_free_port
|
|
from pyarrow import Codec
|
|
|
|
|
|
# setup hypothesis profiles
|
|
h.settings.register_profile('ci', max_examples=1000)
|
|
h.settings.register_profile('dev', max_examples=50)
|
|
h.settings.register_profile('debug', max_examples=10,
|
|
verbosity=h.Verbosity.verbose)
|
|
|
|
# load default hypothesis profile, either set HYPOTHESIS_PROFILE environment
|
|
# variable or pass --hypothesis-profile option to pytest, to see the generated
|
|
# examples try:
|
|
# pytest pyarrow -sv --enable-hypothesis --hypothesis-profile=debug
|
|
h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 'dev'))
|
|
|
|
# Set this at the beginning before the AWS SDK was loaded to avoid reading in
|
|
# user configuration values.
|
|
os.environ['AWS_CONFIG_FILE'] = "/dev/null"
|
|
|
|
|
|
groups = [
|
|
'brotli',
|
|
'bz2',
|
|
'cython',
|
|
'dataset',
|
|
'hypothesis',
|
|
'fastparquet',
|
|
'gandiva',
|
|
'gdb',
|
|
'gzip',
|
|
'hdfs',
|
|
'large_memory',
|
|
'lz4',
|
|
'memory_leak',
|
|
'nopandas',
|
|
'orc',
|
|
'pandas',
|
|
'parquet',
|
|
'parquet_encryption',
|
|
'plasma',
|
|
's3',
|
|
'snappy',
|
|
'tensorflow',
|
|
'flight',
|
|
'slow',
|
|
'requires_testing_data',
|
|
'zstd',
|
|
]
|
|
|
|
defaults = {
|
|
'brotli': Codec.is_available('brotli'),
|
|
'bz2': Codec.is_available('bz2'),
|
|
'cython': False,
|
|
'dataset': False,
|
|
'fastparquet': False,
|
|
'flight': False,
|
|
'gandiva': False,
|
|
'gdb': True,
|
|
'gzip': Codec.is_available('gzip'),
|
|
'hdfs': False,
|
|
'hypothesis': False,
|
|
'large_memory': False,
|
|
'lz4': Codec.is_available('lz4'),
|
|
'memory_leak': False,
|
|
'nopandas': False,
|
|
'orc': False,
|
|
'pandas': False,
|
|
'parquet': False,
|
|
'parquet_encryption': False,
|
|
'plasma': False,
|
|
'requires_testing_data': True,
|
|
's3': False,
|
|
'slow': False,
|
|
'snappy': Codec.is_available('snappy'),
|
|
'tensorflow': False,
|
|
'zstd': Codec.is_available('zstd'),
|
|
}
|
|
|
|
try:
|
|
import cython # noqa
|
|
defaults['cython'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import fastparquet # noqa
|
|
defaults['fastparquet'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.gandiva # noqa
|
|
defaults['gandiva'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.dataset # noqa
|
|
defaults['dataset'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.orc # noqa
|
|
defaults['orc'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pandas # noqa
|
|
defaults['pandas'] = True
|
|
except ImportError:
|
|
defaults['nopandas'] = True
|
|
|
|
try:
|
|
import pyarrow.parquet # noqa
|
|
defaults['parquet'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.parquet.encryption # noqa
|
|
defaults['parquet_encryption'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
try:
|
|
import pyarrow.plasma # noqa
|
|
defaults['plasma'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import tensorflow # noqa
|
|
defaults['tensorflow'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.flight # noqa
|
|
defaults['flight'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from pyarrow.fs import S3FileSystem # noqa
|
|
defaults['s3'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from pyarrow.fs import HadoopFileSystem # noqa
|
|
defaults['hdfs'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
# Create options to selectively enable test groups
|
|
def bool_env(name, default=None):
|
|
value = os.environ.get(name.upper())
|
|
if not value: # missing or empty
|
|
return default
|
|
value = value.lower()
|
|
if value in {'1', 'true', 'on', 'yes', 'y'}:
|
|
return True
|
|
elif value in {'0', 'false', 'off', 'no', 'n'}:
|
|
return False
|
|
else:
|
|
raise ValueError('{}={} is not parsable as boolean'
|
|
.format(name.upper(), value))
|
|
|
|
for group in groups:
|
|
default = bool_env('PYARROW_TEST_{}'.format(group), defaults[group])
|
|
parser.addoption('--enable-{}'.format(group),
|
|
action='store_true', default=default,
|
|
help=('Enable the {} test group'.format(group)))
|
|
parser.addoption('--disable-{}'.format(group),
|
|
action='store_true', default=False,
|
|
help=('Disable the {} test group'.format(group)))
|
|
|
|
|
|
class PyArrowConfig:
|
|
def __init__(self):
|
|
self.is_enabled = {}
|
|
|
|
def apply_mark(self, mark):
|
|
group = mark.name
|
|
if group in groups:
|
|
self.requires(group)
|
|
|
|
def requires(self, group):
|
|
if not self.is_enabled[group]:
|
|
pytest.skip('{} NOT enabled'.format(group))
|
|
|
|
|
|
def pytest_configure(config):
|
|
# Apply command-line options to initialize PyArrow-specific config object
|
|
config.pyarrow = PyArrowConfig()
|
|
|
|
for mark in groups:
|
|
config.addinivalue_line(
|
|
"markers", mark,
|
|
)
|
|
|
|
enable_flag = '--enable-{}'.format(mark)
|
|
disable_flag = '--disable-{}'.format(mark)
|
|
|
|
is_enabled = (config.getoption(enable_flag) and not
|
|
config.getoption(disable_flag))
|
|
config.pyarrow.is_enabled[mark] = is_enabled
|
|
|
|
|
|
def pytest_runtest_setup(item):
|
|
# Apply test markers to skip tests selectively
|
|
for mark in item.iter_markers():
|
|
item.config.pyarrow.apply_mark(mark)
|
|
|
|
|
|
@pytest.fixture
|
|
def tempdir(tmpdir):
|
|
# convert pytest's LocalPath to pathlib.Path
|
|
return pathlib.Path(tmpdir.strpath)
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def base_datadir():
|
|
return pathlib.Path(__file__).parent / 'data'
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def disable_aws_metadata(monkeypatch):
|
|
"""Stop the AWS SDK from trying to contact the EC2 metadata server.
|
|
|
|
Otherwise, this causes a 5 second delay in tests that exercise the
|
|
S3 filesystem.
|
|
"""
|
|
monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
|
|
|
|
|
|
# TODO(kszucs): move the following fixtures to test_fs.py once the previous
|
|
# parquet dataset implementation and hdfs implementation are removed.
|
|
|
|
@pytest.fixture(scope='session')
|
|
def hdfs_connection():
|
|
host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
|
|
port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 0))
|
|
user = os.environ.get('ARROW_HDFS_TEST_USER', 'hdfs')
|
|
return host, port, user
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def s3_connection():
|
|
host, port = 'localhost', find_free_port()
|
|
access_key, secret_key = 'arrow', 'apachearrow'
|
|
return host, port, access_key, secret_key
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def s3_server(s3_connection):
|
|
host, port, access_key, secret_key = s3_connection
|
|
|
|
address = '{}:{}'.format(host, port)
|
|
env = os.environ.copy()
|
|
env.update({
|
|
'MINIO_ACCESS_KEY': access_key,
|
|
'MINIO_SECRET_KEY': secret_key
|
|
})
|
|
|
|
with TemporaryDirectory() as tempdir:
|
|
args = ['minio', '--compat', 'server', '--quiet', '--address',
|
|
address, tempdir]
|
|
proc = None
|
|
try:
|
|
proc = subprocess.Popen(args, env=env)
|
|
except OSError:
|
|
pytest.skip('`minio` command cannot be located')
|
|
else:
|
|
yield {
|
|
'connection': s3_connection,
|
|
'process': proc,
|
|
'tempdir': tempdir
|
|
}
|
|
finally:
|
|
if proc is not None:
|
|
proc.kill()
|