first commit

2025-09-05 10:33:33 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/io/sas/init.py
+++ b/.venv/Lib/site-packages/pandas/io/sas/init.py
@@ -0,0 +1 @@
+from pandas.io.sas.sasreader import read_sas  # noqa:F401
--- a/.venv/Lib/site-packages/pandas/io/sas/_sas.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pandas/io/sas/_sas.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pandas/io/sas/sas.pyx
+++ b/.venv/Lib/site-packages/pandas/io/sas/sas.pyx
@@ -0,0 +1,439 @@
+# cython: profile=False
+# cython: boundscheck=False, initializedcheck=False
+from cython import Py_ssize_t
+import numpy as np
+
+import pandas.io.sas.sas_constants as const
+
+ctypedef signed long long   int64_t
+ctypedef unsigned char      uint8_t
+ctypedef unsigned short     uint16_t
+
+# rle_decompress decompresses data using a Run Length Encoding
+# algorithm.  It is partially documented here:
+#
+# https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
+cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff):
+
+    cdef:
+        uint8_t control_byte, x
+        uint8_t[:] result = np.zeros(result_length, np.uint8)
+        int rpos = 0
+        int i, nbytes, end_of_first_byte
+        Py_ssize_t ipos = 0, length = len(inbuff)
+
+    while ipos < length:
+        control_byte = inbuff[ipos] & 0xF0
+        end_of_first_byte = <int>(inbuff[ipos] & 0x0F)
+        ipos += 1
+
+        if control_byte == 0x00:
+            if end_of_first_byte != 0:
+                raise ValueError("Unexpected non-zero end_of_first_byte")
+            nbytes = <int>(inbuff[ipos]) + 64
+            ipos += 1
+            for _ in range(nbytes):
+                result[rpos] = inbuff[ipos]
+                rpos += 1
+                ipos += 1
+        elif control_byte == 0x40:
+            # not documented
+            nbytes = end_of_first_byte * 16
+            nbytes += <int>(inbuff[ipos])
+            ipos += 1
+            for _ in range(nbytes):
+                result[rpos] = inbuff[ipos]
+                rpos += 1
+            ipos += 1
+        elif control_byte == 0x60:
+            nbytes = end_of_first_byte * 256 + <int>(inbuff[ipos]) + 17
+            ipos += 1
+            for _ in range(nbytes):
+                result[rpos] = 0x20
+                rpos += 1
+        elif control_byte == 0x70:
+            nbytes = end_of_first_byte * 256 + <int>(inbuff[ipos]) + 17
+            ipos += 1
+            for _ in range(nbytes):
+                result[rpos] = 0x00
+                rpos += 1
+        elif control_byte == 0x80:
+            nbytes = end_of_first_byte + 1
+            for i in range(nbytes):
+                result[rpos] = inbuff[ipos + i]
+                rpos += 1
+            ipos += nbytes
+        elif control_byte == 0x90:
+            nbytes = end_of_first_byte + 17
+            for i in range(nbytes):
+                result[rpos] = inbuff[ipos + i]
+                rpos += 1
+            ipos += nbytes
+        elif control_byte == 0xA0:
+            nbytes = end_of_first_byte + 33
+            for i in range(nbytes):
+                result[rpos] = inbuff[ipos + i]
+                rpos += 1
+            ipos += nbytes
+        elif control_byte == 0xB0:
+            nbytes = end_of_first_byte + 49
+            for i in range(nbytes):
+                result[rpos] = inbuff[ipos + i]
+                rpos += 1
+            ipos += nbytes
+        elif control_byte == 0xC0:
+            nbytes = end_of_first_byte + 3
+            x = inbuff[ipos]
+            ipos += 1
+            for _ in range(nbytes):
+                result[rpos] = x
+                rpos += 1
+        elif control_byte == 0xD0:
+            nbytes = end_of_first_byte + 2
+            for _ in range(nbytes):
+                result[rpos] = 0x40
+                rpos += 1
+        elif control_byte == 0xE0:
+            nbytes = end_of_first_byte + 2
+            for _ in range(nbytes):
+                result[rpos] = 0x20
+                rpos += 1
+        elif control_byte == 0xF0:
+            nbytes = end_of_first_byte + 2
+            for _ in range(nbytes):
+                result[rpos] = 0x00
+                rpos += 1
+        else:
+            raise ValueError(f"unknown control byte: {control_byte}")
+
+    # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t
+    if <Py_ssize_t>len(result) != <Py_ssize_t>result_length:
+        raise ValueError(f"RLE: {len(result)} != {result_length}")
+
+    return np.asarray(result)
+
+
+# rdc_decompress decompresses data using the Ross Data Compression algorithm:
+#
+# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff):
+
+    cdef:
+        uint8_t cmd
+        uint16_t ctrl_bits = 0, ctrl_mask = 0, ofs, cnt
+        int rpos = 0, k
+        uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8)
+        Py_ssize_t ipos = 0, length = len(inbuff)
+
+    ii = -1
+
+    while ipos < length:
+        ii += 1
+        ctrl_mask = ctrl_mask >> 1
+        if ctrl_mask == 0:
+            ctrl_bits = ((<uint16_t>inbuff[ipos] << 8) +
+                         <uint16_t>inbuff[ipos + 1])
+            ipos += 2
+            ctrl_mask = 0x8000
+
+        if ctrl_bits & ctrl_mask == 0:
+            outbuff[rpos] = inbuff[ipos]
+            ipos += 1
+            rpos += 1
+            continue
+
+        cmd = (inbuff[ipos] >> 4) & 0x0F
+        cnt = <uint16_t>(inbuff[ipos] & 0x0F)
+        ipos += 1
+
+        # short RLE
+        if cmd == 0:
+            cnt += 3
+            for k in range(cnt):
+                outbuff[rpos + k] = inbuff[ipos]
+            rpos += cnt
+            ipos += 1
+
+        # long RLE
+        elif cmd == 1:
+            cnt += <uint16_t>inbuff[ipos] << 4
+            cnt += 19
+            ipos += 1
+            for k in range(cnt):
+                outbuff[rpos + k] = inbuff[ipos]
+            rpos += cnt
+            ipos += 1
+
+        # long pattern
+        elif cmd == 2:
+            ofs = cnt + 3
+            ofs += <uint16_t>inbuff[ipos] << 4
+            ipos += 1
+            cnt = <uint16_t>inbuff[ipos]
+            ipos += 1
+            cnt += 16
+            for k in range(cnt):
+                outbuff[rpos + k] = outbuff[rpos - <int>ofs + k]
+            rpos += cnt
+
+        # short pattern
+        elif (cmd >= 3) & (cmd <= 15):
+            ofs = cnt + 3
+            ofs += <uint16_t>inbuff[ipos] << 4
+            ipos += 1
+            for k in range(cmd):
+                outbuff[rpos + k] = outbuff[rpos - <int>ofs + k]
+            rpos += cmd
+
+        else:
+            raise ValueError("unknown RDC command")
+
+    # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t
+    if <Py_ssize_t>len(outbuff) != <Py_ssize_t>result_length:
+        raise ValueError(f"RDC: {len(outbuff)} != {result_length}\n")
+
+    return np.asarray(outbuff)
+
+
+cdef enum ColumnTypes:
+    column_type_decimal = 1
+    column_type_string = 2
+
+
+# type the page_data types
+cdef:
+    int page_meta_type = const.page_meta_type
+    int page_mix_types_0 = const.page_mix_types[0]
+    int page_mix_types_1 = const.page_mix_types[1]
+    int page_data_type = const.page_data_type
+    int subheader_pointers_offset = const.subheader_pointers_offset
+
+
+cdef class Parser:
+
+    cdef:
+        int column_count
+        int64_t[:] lengths
+        int64_t[:] offsets
+        int64_t[:] column_types
+        uint8_t[:, :] byte_chunk
+        object[:, :] string_chunk
+        char *cached_page
+        int current_row_on_page_index
+        int current_page_block_count
+        int current_page_data_subheader_pointers_len
+        int current_page_subheaders_count
+        int current_row_in_chunk_index
+        int current_row_in_file_index
+        int header_length
+        int row_length
+        int bit_offset
+        int subheader_pointer_length
+        int current_page_type
+        bint is_little_endian
+        const uint8_t[:] (*decompress)(int result_length, const uint8_t[:] inbuff)
+        object parser
+
+    def __init__(self, object parser):
+        cdef:
+            int j
+            char[:] column_types
+
+        self.parser = parser
+        self.header_length = self.parser.header_length
+        self.column_count = parser.column_count
+        self.lengths = parser.column_data_lengths()
+        self.offsets = parser.column_data_offsets()
+        self.byte_chunk = parser._byte_chunk
+        self.string_chunk = parser._string_chunk
+        self.row_length = parser.row_length
+        self.bit_offset = self.parser._page_bit_offset
+        self.subheader_pointer_length = self.parser._subheader_pointer_length
+        self.is_little_endian = parser.byte_order == "<"
+        self.column_types = np.empty(self.column_count, dtype='int64')
+
+        # page indicators
+        self.update_next_page()
+
+        column_types = parser.column_types()
+
+        # map column types
+        for j in range(self.column_count):
+            if column_types[j] == b'd':
+                self.column_types[j] = column_type_decimal
+            elif column_types[j] == b's':
+                self.column_types[j] = column_type_string
+            else:
+                raise ValueError(f"unknown column type: {self.parser.columns[j].ctype}")
+
+        # compression
+        if parser.compression == const.rle_compression:
+            self.decompress = rle_decompress
+        elif parser.compression == const.rdc_compression:
+            self.decompress = rdc_decompress
+        else:
+            self.decompress = NULL
+
+        # update to current state of the parser
+        self.current_row_in_chunk_index = parser._current_row_in_chunk_index
+        self.current_row_in_file_index = parser._current_row_in_file_index
+        self.current_row_on_page_index = parser._current_row_on_page_index
+
+    def read(self, int nrows):
+        cdef:
+            bint done
+            int i
+
+        for _ in range(nrows):
+            done = self.readline()
+            if done:
+                break
+
+        # update the parser
+        self.parser._current_row_on_page_index = self.current_row_on_page_index
+        self.parser._current_row_in_chunk_index = self.current_row_in_chunk_index
+        self.parser._current_row_in_file_index = self.current_row_in_file_index
+
+    cdef bint read_next_page(self):
+        cdef done
+
+        done = self.parser._read_next_page()
+        if done:
+            self.cached_page = NULL
+        else:
+            self.update_next_page()
+        return done
+
+    cdef update_next_page(self):
+        # update data for the current page
+
+        self.cached_page = <char *>self.parser._cached_page
+        self.current_row_on_page_index = 0
+        self.current_page_type = self.parser._current_page_type
+        self.current_page_block_count = self.parser._current_page_block_count
+        self.current_page_data_subheader_pointers_len = len(
+            self.parser._current_page_data_subheader_pointers
+        )
+        self.current_page_subheaders_count = self.parser._current_page_subheaders_count
+
+    cdef readline(self):
+
+        cdef:
+            int offset, bit_offset, align_correction
+            int subheader_pointer_length, mn
+            bint done, flag
+
+        bit_offset = self.bit_offset
+        subheader_pointer_length = self.subheader_pointer_length
+
+        # If there is no page, go to the end of the header and read a page.
+        if self.cached_page == NULL:
+            self.parser._path_or_buf.seek(self.header_length)
+            done = self.read_next_page()
+            if done:
+                return True
+
+        # Loop until a data row is read
+        while True:
+            if self.current_page_type == page_meta_type:
+                flag = self.current_row_on_page_index >=\
+                    self.current_page_data_subheader_pointers_len
+                if flag:
+                    done = self.read_next_page()
+                    if done:
+                        return True
+                    continue
+                current_subheader_pointer = (
+                    self.parser._current_page_data_subheader_pointers[
+                        self.current_row_on_page_index])
+                self.process_byte_array_with_data(
+                    current_subheader_pointer.offset,
+                    current_subheader_pointer.length)
+                return False
+            elif (self.current_page_type == page_mix_types_0 or
+                    self.current_page_type == page_mix_types_1):
+                align_correction = (
+                    bit_offset
+                    + subheader_pointers_offset
+                    + self.current_page_subheaders_count * subheader_pointer_length
+                )
+                align_correction = align_correction % 8
+                offset = bit_offset + align_correction
+                offset += subheader_pointers_offset
+                offset += self.current_page_subheaders_count * subheader_pointer_length
+                offset += self.current_row_on_page_index * self.row_length
+                self.process_byte_array_with_data(offset, self.row_length)
+                mn = min(self.parser.row_count, self.parser._mix_page_row_count)
+                if self.current_row_on_page_index == mn:
+                    done = self.read_next_page()
+                    if done:
+                        return True
+                return False
+            elif self.current_page_type & page_data_type == page_data_type:
+                self.process_byte_array_with_data(
+                    bit_offset
+                    + subheader_pointers_offset
+                    + self.current_row_on_page_index * self.row_length,
+                    self.row_length,
+                )
+                flag = self.current_row_on_page_index == self.current_page_block_count
+                if flag:
+                    done = self.read_next_page()
+                    if done:
+                        return True
+                return False
+            else:
+                raise ValueError(f"unknown page type: {self.current_page_type}")
+
+    cdef void process_byte_array_with_data(self, int offset, int length):
+
+        cdef:
+            Py_ssize_t j
+            int s, k, m, jb, js, current_row
+            int64_t lngt, start, ct
+            const uint8_t[:] source
+            int64_t[:] column_types
+            int64_t[:] lengths
+            int64_t[:] offsets
+            uint8_t[:, :] byte_chunk
+            object[:, :] string_chunk
+
+        source = np.frombuffer(
+            self.cached_page[offset:offset + length], dtype=np.uint8)
+
+        if self.decompress != NULL and (length < self.row_length):
+            source = self.decompress(self.row_length, source)
+
+        current_row = self.current_row_in_chunk_index
+        column_types = self.column_types
+        lengths = self.lengths
+        offsets = self.offsets
+        byte_chunk = self.byte_chunk
+        string_chunk = self.string_chunk
+        s = 8 * self.current_row_in_chunk_index
+        js = 0
+        jb = 0
+        for j in range(self.column_count):
+            lngt = lengths[j]
+            if lngt == 0:
+                break
+            start = offsets[j]
+            ct = column_types[j]
+            if ct == column_type_decimal:
+                # decimal
+                if self.is_little_endian:
+                    m = s + 8 - lngt
+                else:
+                    m = s
+                for k in range(lngt):
+                    byte_chunk[jb, m + k] = source[start + k]
+                jb += 1
+            elif column_types[j] == column_type_string:
+                # string
+                string_chunk[js, current_row] = np.array(source[start:(
+                    start + lngt)]).tobytes().rstrip(b"\x00 ")
+                js += 1
+
+        self.current_row_on_page_index += 1
+        self.current_row_in_chunk_index += 1
+        self.current_row_in_file_index += 1
--- a/.venv/Lib/site-packages/pandas/io/sas/sas7bdat.py
+++ b/.venv/Lib/site-packages/pandas/io/sas/sas7bdat.py
@@ -0,0 +1,824 @@
+"""
+Read SAS7BDAT files
+
+Based on code written by Jared Hobbs:
+  https://bitbucket.org/jaredhobbs/sas7bdat
+
+See also:
+  https://github.com/BioStatMatt/sas7bdat
+
+Partial documentation of the file format:
+  https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
+
+Reference for binary data compression:
+  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+"""
+from __future__ import annotations
+
+from collections import abc
+from datetime import (
+    datetime,
+    timedelta,
+)
+import struct
+from typing import cast
+
+import numpy as np
+
+from pandas._typing import (
+    FilePath,
+    ReadBuffer,
+)
+from pandas.errors import (
+    EmptyDataError,
+    OutOfBoundsDatetime,
+)
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    isna,
+)
+
+from pandas.io.common import get_handle
+from pandas.io.sas._sas import Parser
+import pandas.io.sas.sas_constants as const
+from pandas.io.sas.sasreader import ReaderBase
+
+
+def _parse_datetime(sas_datetime: float, unit: str):
+    if isna(sas_datetime):
+        return pd.NaT
+
+    if unit == "s":
+        return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime)
+
+    elif unit == "d":
+        return datetime(1960, 1, 1) + timedelta(days=sas_datetime)
+
+    else:
+        raise ValueError("unit must be 'd' or 's'")
+
+
+def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
+    """
+    Convert to Timestamp if possible, otherwise to datetime.datetime.
+    SAS float64 lacks precision for more than ms resolution so the fit
+    to datetime.datetime is ok.
+
+    Parameters
+    ----------
+    sas_datetimes : {Series, Sequence[float]}
+       Dates or datetimes in SAS
+    unit : {str}
+       "d" if the floats represent dates, "s" for datetimes
+
+    Returns
+    -------
+    Series
+       Series of datetime64 dtype or datetime.datetime.
+    """
+    try:
+        return pd.to_datetime(sas_datetimes, unit=unit, origin="1960-01-01")
+    except OutOfBoundsDatetime:
+        s_series = sas_datetimes.apply(_parse_datetime, unit=unit)
+        s_series = cast(pd.Series, s_series)
+        return s_series
+
+
+class _SubheaderPointer:
+    offset: int
+    length: int
+    compression: int
+    ptype: int
+
+    def __init__(self, offset: int, length: int, compression: int, ptype: int):
+        self.offset = offset
+        self.length = length
+        self.compression = compression
+        self.ptype = ptype
+
+
+class _Column:
+    col_id: int
+    name: str | bytes
+    label: str | bytes
+    format: str | bytes
+    ctype: bytes
+    length: int
+
+    def __init__(
+        self,
+        col_id: int,
+        # These can be bytes when convert_header_text is False
+        name: str | bytes,
+        label: str | bytes,
+        format: str | bytes,
+        ctype: bytes,
+        length: int,
+    ):
+        self.col_id = col_id
+        self.name = name
+        self.label = label
+        self.format = format
+        self.ctype = ctype
+        self.length = length
+
+
+# SAS7BDAT represents a SAS data file in SAS7BDAT format.
+class SAS7BDATReader(ReaderBase, abc.Iterator):
+    """
+    Read SAS files in SAS7BDAT format.
+
+    Parameters
+    ----------
+    path_or_buf : path name or buffer
+        Name of SAS file or file-like object pointing to SAS file
+        contents.
+    index : column identifier, defaults to None
+        Column to use as index.
+    convert_dates : bool, defaults to True
+        Attempt to convert dates to Pandas datetime values.  Note that
+        some rarely used SAS date formats may be unsupported.
+    blank_missing : bool, defaults to True
+        Convert empty strings to missing values (SAS uses blanks to
+        indicate missing character variables).
+    chunksize : int, defaults to None
+        Return SAS7BDATReader object for iterations, returns chunks
+        with given number of lines.
+    encoding : string, defaults to None
+        String encoding.
+    convert_text : bool, defaults to True
+        If False, text variables are left as raw bytes.
+    convert_header_text : bool, defaults to True
+        If False, header text, including column names, are left as raw
+        bytes.
+    """
+
+    _int_length: int
+    _cached_page: bytes | None
+
+    def __init__(
+        self,
+        path_or_buf: FilePath | ReadBuffer[bytes],
+        index=None,
+        convert_dates=True,
+        blank_missing=True,
+        chunksize=None,
+        encoding=None,
+        convert_text=True,
+        convert_header_text=True,
+    ):
+
+        self.index = index
+        self.convert_dates = convert_dates
+        self.blank_missing = blank_missing
+        self.chunksize = chunksize
+        self.encoding = encoding
+        self.convert_text = convert_text
+        self.convert_header_text = convert_header_text
+
+        self.default_encoding = "latin-1"
+        self.compression = b""
+        self.column_names_strings: list[str] = []
+        self.column_names: list[str] = []
+        self.column_formats: list[str] = []
+        self.columns: list[_Column] = []
+
+        self._current_page_data_subheader_pointers: list[_SubheaderPointer] = []
+        self._cached_page = None
+        self._column_data_lengths: list[int] = []
+        self._column_data_offsets: list[int] = []
+        self._column_types: list[bytes] = []
+
+        self._current_row_in_file_index = 0
+        self._current_row_on_page_index = 0
+        self._current_row_in_file_index = 0
+
+        self.handles = get_handle(path_or_buf, "rb", is_text=False)
+
+        self._path_or_buf = self.handles.handle
+
+        try:
+            self._get_properties()
+            self._parse_metadata()
+        except Exception:
+            self.close()
+            raise
+
+    def column_data_lengths(self) -> np.ndarray:
+        """Return a numpy int64 array of the column data lengths"""
+        return np.asarray(self._column_data_lengths, dtype=np.int64)
+
+    def column_data_offsets(self) -> np.ndarray:
+        """Return a numpy int64 array of the column offsets"""
+        return np.asarray(self._column_data_offsets, dtype=np.int64)
+
+    def column_types(self) -> np.ndarray:
+        """
+        Returns a numpy character array of the column types:
+           s (string) or d (double)
+        """
+        return np.asarray(self._column_types, dtype=np.dtype("S1"))
+
+    def close(self) -> None:
+        self.handles.close()
+
+    def _get_properties(self) -> None:
+
+        # Check magic number
+        self._path_or_buf.seek(0)
+        self._cached_page = self._path_or_buf.read(288)
+        if self._cached_page[0 : len(const.magic)] != const.magic:
+            raise ValueError("magic number mismatch (not a SAS file?)")
+
+        # Get alignment information
+        align1, align2 = 0, 0
+        buf = self._read_bytes(const.align_1_offset, const.align_1_length)
+        if buf == const.u64_byte_checker_value:
+            align2 = const.align_2_value
+            self.U64 = True
+            self._int_length = 8
+            self._page_bit_offset = const.page_bit_offset_x64
+            self._subheader_pointer_length = const.subheader_pointer_length_x64
+        else:
+            self.U64 = False
+            self._page_bit_offset = const.page_bit_offset_x86
+            self._subheader_pointer_length = const.subheader_pointer_length_x86
+            self._int_length = 4
+        buf = self._read_bytes(const.align_2_offset, const.align_2_length)
+        if buf == const.align_1_checker_value:
+            align1 = const.align_2_value
+        total_align = align1 + align2
+
+        # Get endianness information
+        buf = self._read_bytes(const.endianness_offset, const.endianness_length)
+        if buf == b"\x01":
+            self.byte_order = "<"
+        else:
+            self.byte_order = ">"
+
+        # Get encoding information
+        buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
+        if buf in const.encoding_names:
+            self.file_encoding = const.encoding_names[buf]
+        else:
+            self.file_encoding = f"unknown (code={buf})"
+
+        # Get platform information
+        buf = self._read_bytes(const.platform_offset, const.platform_length)
+        if buf == b"1":
+            self.platform = "unix"
+        elif buf == b"2":
+            self.platform = "windows"
+        else:
+            self.platform = "unknown"
+
+        buf = self._read_bytes(const.dataset_offset, const.dataset_length)
+        self.name = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.name = self.name.decode(self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.file_type_offset, const.file_type_length)
+        self.file_type = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.file_type = self.file_type.decode(
+                self.encoding or self.default_encoding
+            )
+
+        # Timestamp is epoch 01/01/1960
+        epoch = datetime(1960, 1, 1)
+        x = self._read_float(
+            const.date_created_offset + align1, const.date_created_length
+        )
+        self.date_created = epoch + pd.to_timedelta(x, unit="s")
+        x = self._read_float(
+            const.date_modified_offset + align1, const.date_modified_length
+        )
+        self.date_modified = epoch + pd.to_timedelta(x, unit="s")
+
+        self.header_length = self._read_int(
+            const.header_size_offset + align1, const.header_size_length
+        )
+
+        # Read the rest of the header into cached_page.
+        buf = self._path_or_buf.read(self.header_length - 288)
+        self._cached_page += buf
+        # error: Argument 1 to "len" has incompatible type "Optional[bytes]";
+        #  expected "Sized"
+        if len(self._cached_page) != self.header_length:  # type: ignore[arg-type]
+            raise ValueError("The SAS7BDAT file appears to be truncated.")
+
+        self._page_length = self._read_int(
+            const.page_size_offset + align1, const.page_size_length
+        )
+        self._page_count = self._read_int(
+            const.page_count_offset + align1, const.page_count_length
+        )
+
+        buf = self._read_bytes(
+            const.sas_release_offset + total_align, const.sas_release_length
+        )
+        self.sas_release = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.sas_release = self.sas_release.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(
+            const.sas_server_type_offset + total_align, const.sas_server_type_length
+        )
+        self.server_type = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.server_type = self.server_type.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(
+            const.os_version_number_offset + total_align, const.os_version_number_length
+        )
+        self.os_version = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.os_version = self.os_version.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(const.os_name_offset + total_align, const.os_name_length)
+        buf = buf.rstrip(b"\x00 ")
+        if len(buf) > 0:
+            self.os_name = buf.decode(self.encoding or self.default_encoding)
+        else:
+            buf = self._read_bytes(
+                const.os_maker_offset + total_align, const.os_maker_length
+            )
+            self.os_name = buf.rstrip(b"\x00 ")
+            if self.convert_header_text:
+                self.os_name = self.os_name.decode(
+                    self.encoding or self.default_encoding
+                )
+
+    def __next__(self):
+        da = self.read(nrows=self.chunksize or 1)
+        if da is None:
+            self.close()
+            raise StopIteration
+        return da
+
+    # Read a single float of the given width (4 or 8).
+    def _read_float(self, offset: int, width: int):
+        if width not in (4, 8):
+            self.close()
+            raise ValueError("invalid float width")
+        buf = self._read_bytes(offset, width)
+        fd = "f" if width == 4 else "d"
+        return struct.unpack(self.byte_order + fd, buf)[0]
+
+    # Read a single signed integer of the given width (1, 2, 4 or 8).
+    def _read_int(self, offset: int, width: int) -> int:
+        if width not in (1, 2, 4, 8):
+            self.close()
+            raise ValueError("invalid int width")
+        buf = self._read_bytes(offset, width)
+        it = {1: "b", 2: "h", 4: "l", 8: "q"}[width]
+        iv = struct.unpack(self.byte_order + it, buf)[0]
+        return iv
+
+    def _read_bytes(self, offset: int, length: int):
+        if self._cached_page is None:
+            self._path_or_buf.seek(offset)
+            buf = self._path_or_buf.read(length)
+            if len(buf) < length:
+                self.close()
+                msg = f"Unable to read {length:d} bytes from file position {offset:d}."
+                raise ValueError(msg)
+            return buf
+        else:
+            if offset + length > len(self._cached_page):
+                self.close()
+                raise ValueError("The cached page is too small.")
+            return self._cached_page[offset : offset + length]
+
+    def _parse_metadata(self) -> None:
+        done = False
+        while not done:
+            self._cached_page = self._path_or_buf.read(self._page_length)
+            if len(self._cached_page) <= 0:
+                break
+            if len(self._cached_page) != self._page_length:
+                raise ValueError("Failed to read a meta data page from the SAS file.")
+            done = self._process_page_meta()
+
+    def _process_page_meta(self) -> bool:
+        self._read_page_header()
+        pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
+        if self._current_page_type in pt:
+            self._process_page_metadata()
+        is_data_page = self._current_page_type & const.page_data_type
+        is_mix_page = self._current_page_type in const.page_mix_types
+        return bool(
+            is_data_page
+            or is_mix_page
+            or self._current_page_data_subheader_pointers != []
+        )
+
+    def _read_page_header(self):
+        bit_offset = self._page_bit_offset
+        tx = const.page_type_offset + bit_offset
+        self._current_page_type = self._read_int(tx, const.page_type_length)
+        tx = const.block_count_offset + bit_offset
+        self._current_page_block_count = self._read_int(tx, const.block_count_length)
+        tx = const.subheader_count_offset + bit_offset
+        self._current_page_subheaders_count = self._read_int(
+            tx, const.subheader_count_length
+        )
+
+    def _process_page_metadata(self) -> None:
+        bit_offset = self._page_bit_offset
+
+        for i in range(self._current_page_subheaders_count):
+            pointer = self._process_subheader_pointers(
+                const.subheader_pointers_offset + bit_offset, i
+            )
+            if pointer.length == 0:
+                continue
+            if pointer.compression == const.truncated_subheader_id:
+                continue
+            subheader_signature = self._read_subheader_signature(pointer.offset)
+            subheader_index = self._get_subheader_index(
+                subheader_signature, pointer.compression, pointer.ptype
+            )
+            self._process_subheader(subheader_index, pointer)
+
+    def _get_subheader_index(self, signature: bytes, compression, ptype) -> int:
+        # TODO: return here could be made an enum
+        index = const.subheader_signature_to_index.get(signature)
+        if index is None:
+            f1 = (compression == const.compressed_subheader_id) or (compression == 0)
+            f2 = ptype == const.compressed_subheader_type
+            if (self.compression != b"") and f1 and f2:
+                index = const.SASIndex.data_subheader_index
+            else:
+                self.close()
+                raise ValueError("Unknown subheader signature")
+        return index
+
+    def _process_subheader_pointers(
+        self, offset: int, subheader_pointer_index: int
+    ) -> _SubheaderPointer:
+
+        subheader_pointer_length = self._subheader_pointer_length
+        total_offset = offset + subheader_pointer_length * subheader_pointer_index
+
+        subheader_offset = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_length = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_compression = self._read_int(total_offset, 1)
+        total_offset += 1
+
+        subheader_type = self._read_int(total_offset, 1)
+
+        x = _SubheaderPointer(
+            subheader_offset, subheader_length, subheader_compression, subheader_type
+        )
+
+        return x
+
+    def _read_subheader_signature(self, offset: int) -> bytes:
+        subheader_signature = self._read_bytes(offset, self._int_length)
+        return subheader_signature
+
+    def _process_subheader(
+        self, subheader_index: int, pointer: _SubheaderPointer
+    ) -> None:
+        offset = pointer.offset
+        length = pointer.length
+
+        if subheader_index == const.SASIndex.row_size_index:
+            processor = self._process_rowsize_subheader
+        elif subheader_index == const.SASIndex.column_size_index:
+            processor = self._process_columnsize_subheader
+        elif subheader_index == const.SASIndex.column_text_index:
+            processor = self._process_columntext_subheader
+        elif subheader_index == const.SASIndex.column_name_index:
+            processor = self._process_columnname_subheader
+        elif subheader_index == const.SASIndex.column_attributes_index:
+            processor = self._process_columnattributes_subheader
+        elif subheader_index == const.SASIndex.format_and_label_index:
+            processor = self._process_format_subheader
+        elif subheader_index == const.SASIndex.column_list_index:
+            processor = self._process_columnlist_subheader
+        elif subheader_index == const.SASIndex.subheader_counts_index:
+            processor = self._process_subheader_counts
+        elif subheader_index == const.SASIndex.data_subheader_index:
+            self._current_page_data_subheader_pointers.append(pointer)
+            return
+        else:
+            raise ValueError("unknown subheader index")
+
+        processor(offset, length)
+
+    def _process_rowsize_subheader(self, offset: int, length: int) -> None:
+
+        int_len = self._int_length
+        lcs_offset = offset
+        lcp_offset = offset
+        if self.U64:
+            lcs_offset += 682
+            lcp_offset += 706
+        else:
+            lcs_offset += 354
+            lcp_offset += 378
+
+        self.row_length = self._read_int(
+            offset + const.row_length_offset_multiplier * int_len, int_len
+        )
+        self.row_count = self._read_int(
+            offset + const.row_count_offset_multiplier * int_len, int_len
+        )
+        self.col_count_p1 = self._read_int(
+            offset + const.col_count_p1_multiplier * int_len, int_len
+        )
+        self.col_count_p2 = self._read_int(
+            offset + const.col_count_p2_multiplier * int_len, int_len
+        )
+        mx = const.row_count_on_mix_page_offset_multiplier * int_len
+        self._mix_page_row_count = self._read_int(offset + mx, int_len)
+        self._lcs = self._read_int(lcs_offset, 2)
+        self._lcp = self._read_int(lcp_offset, 2)
+
+    def _process_columnsize_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        self.column_count = self._read_int(offset, int_len)
+        if self.col_count_p1 + self.col_count_p2 != self.column_count:
+            print(
+                f"Warning: column count mismatch ({self.col_count_p1} + "
+                f"{self.col_count_p2} != {self.column_count})\n"
+            )
+
+    # Unknown purpose
+    def _process_subheader_counts(self, offset: int, length: int) -> None:
+        pass
+
+    def _process_columntext_subheader(self, offset: int, length: int) -> None:
+
+        offset += self._int_length
+        text_block_size = self._read_int(offset, const.text_block_size_length)
+
+        buf = self._read_bytes(offset, text_block_size)
+        cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
+        cname = cname_raw
+        if self.convert_header_text:
+            cname = cname.decode(self.encoding or self.default_encoding)
+        self.column_names_strings.append(cname)
+
+        if len(self.column_names_strings) == 1:
+            compression_literal = b""
+            for cl in const.compression_literals:
+                if cl in cname_raw:
+                    compression_literal = cl
+            self.compression = compression_literal
+            offset -= self._int_length
+
+            offset1 = offset + 16
+            if self.U64:
+                offset1 += 4
+
+            buf = self._read_bytes(offset1, self._lcp)
+            compression_literal = buf.rstrip(b"\x00")
+            if compression_literal == b"":
+                self._lcs = 0
+                offset1 = offset + 32
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif compression_literal == const.rle_compression:
+                offset1 = offset + 40
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif self._lcs > 0:
+                self._lcp = 0
+                offset1 = offset + 16
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcs)
+                self.creator_proc = buf[0 : self._lcp]
+            if self.convert_header_text:
+                if hasattr(self, "creator_proc"):
+                    self.creator_proc = self.creator_proc.decode(
+                        self.encoding or self.default_encoding
+                    )
+
+    def _process_columnname_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        column_name_pointers_count = (length - 2 * int_len - 12) // 8
+        for i in range(column_name_pointers_count):
+            text_subheader = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_text_subheader_offset
+            )
+            col_name_offset = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_offset_offset
+            )
+            col_name_length = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_length_offset
+            )
+
+            idx = self._read_int(
+                text_subheader, const.column_name_text_subheader_length
+            )
+            col_offset = self._read_int(
+                col_name_offset, const.column_name_offset_length
+            )
+            col_len = self._read_int(col_name_length, const.column_name_length_length)
+
+            name_str = self.column_names_strings[idx]
+            self.column_names.append(name_str[col_offset : col_offset + col_len])
+
+    def _process_columnattributes_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8)
+        for i in range(column_attributes_vectors_count):
+            col_data_offset = (
+                offset + int_len + const.column_data_offset_offset + i * (int_len + 8)
+            )
+            col_data_len = (
+                offset
+                + 2 * int_len
+                + const.column_data_length_offset
+                + i * (int_len + 8)
+            )
+            col_types = (
+                offset + 2 * int_len + const.column_type_offset + i * (int_len + 8)
+            )
+
+            x = self._read_int(col_data_offset, int_len)
+            self._column_data_offsets.append(x)
+
+            x = self._read_int(col_data_len, const.column_data_length_length)
+            self._column_data_lengths.append(x)
+
+            x = self._read_int(col_types, const.column_type_length)
+            self._column_types.append(b"d" if x == 1 else b"s")
+
+    def _process_columnlist_subheader(self, offset: int, length: int) -> None:
+        # unknown purpose
+        pass
+
+    def _process_format_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        text_subheader_format = (
+            offset + const.column_format_text_subheader_index_offset + 3 * int_len
+        )
+        col_format_offset = offset + const.column_format_offset_offset + 3 * int_len
+        col_format_len = offset + const.column_format_length_offset + 3 * int_len
+        text_subheader_label = (
+            offset + const.column_label_text_subheader_index_offset + 3 * int_len
+        )
+        col_label_offset = offset + const.column_label_offset_offset + 3 * int_len
+        col_label_len = offset + const.column_label_length_offset + 3 * int_len
+
+        x = self._read_int(
+            text_subheader_format, const.column_format_text_subheader_index_length
+        )
+        format_idx = min(x, len(self.column_names_strings) - 1)
+
+        format_start = self._read_int(
+            col_format_offset, const.column_format_offset_length
+        )
+        format_len = self._read_int(col_format_len, const.column_format_length_length)
+
+        label_idx = self._read_int(
+            text_subheader_label, const.column_label_text_subheader_index_length
+        )
+        label_idx = min(label_idx, len(self.column_names_strings) - 1)
+
+        label_start = self._read_int(col_label_offset, const.column_label_offset_length)
+        label_len = self._read_int(col_label_len, const.column_label_length_length)
+
+        label_names = self.column_names_strings[label_idx]
+        column_label = label_names[label_start : label_start + label_len]
+        format_names = self.column_names_strings[format_idx]
+        column_format = format_names[format_start : format_start + format_len]
+        current_column_number = len(self.columns)
+
+        col = _Column(
+            current_column_number,
+            self.column_names[current_column_number],
+            column_label,
+            column_format,
+            self._column_types[current_column_number],
+            self._column_data_lengths[current_column_number],
+        )
+
+        self.column_formats.append(column_format)
+        self.columns.append(col)
+
+    def read(self, nrows: int | None = None) -> DataFrame | None:
+
+        if (nrows is None) and (self.chunksize is not None):
+            nrows = self.chunksize
+        elif nrows is None:
+            nrows = self.row_count
+
+        if len(self._column_types) == 0:
+            self.close()
+            raise EmptyDataError("No columns to parse from file")
+
+        if self._current_row_in_file_index >= self.row_count:
+            return None
+
+        m = self.row_count - self._current_row_in_file_index
+        if nrows > m:
+            nrows = m
+
+        nd = self._column_types.count(b"d")
+        ns = self._column_types.count(b"s")
+
+        self._string_chunk = np.empty((ns, nrows), dtype=object)
+        self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
+
+        self._current_row_in_chunk_index = 0
+        p = Parser(self)
+        p.read(nrows)
+
+        rslt = self._chunk_to_dataframe()
+        if self.index is not None:
+            rslt = rslt.set_index(self.index)
+
+        return rslt
+
+    def _read_next_page(self):
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = self._path_or_buf.read(self._page_length)
+        if len(self._cached_page) <= 0:
+            return True
+        elif len(self._cached_page) != self._page_length:
+            self.close()
+            msg = (
+                "failed to read complete page from file (read "
+                f"{len(self._cached_page):d} of {self._page_length:d} bytes)"
+            )
+            raise ValueError(msg)
+
+        self._read_page_header()
+        page_type = self._current_page_type
+        if page_type == const.page_meta_type:
+            self._process_page_metadata()
+
+        is_data_page = page_type & const.page_data_type
+        pt = [const.page_meta_type] + const.page_mix_types
+        if not is_data_page and self._current_page_type not in pt:
+            return self._read_next_page()
+
+        return False
+
+    def _chunk_to_dataframe(self) -> DataFrame:
+
+        n = self._current_row_in_chunk_index
+        m = self._current_row_in_file_index
+        ix = range(m - n, m)
+        rslt = {}
+
+        js, jb = 0, 0
+        for j in range(self.column_count):
+
+            name = self.column_names[j]
+
+            if self._column_types[j] == b"d":
+                col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
+                rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix)
+                if self.convert_dates:
+                    if self.column_formats[j] in const.sas_date_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "d")
+                    elif self.column_formats[j] in const.sas_datetime_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "s")
+                jb += 1
+            elif self._column_types[j] == b"s":
+                rslt[name] = pd.Series(self._string_chunk[js, :], index=ix)
+                if self.convert_text and (self.encoding is not None):
+                    rslt[name] = rslt[name].str.decode(
+                        self.encoding or self.default_encoding
+                    )
+                if self.blank_missing:
+                    ii = rslt[name].str.len() == 0
+                    rslt[name][ii] = np.nan
+                js += 1
+            else:
+                self.close()
+                raise ValueError(f"unknown column type {repr(self._column_types[j])}")
+
+        df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False)
+        return df
--- a/.venv/Lib/site-packages/pandas/io/sas/sas_constants.py
+++ b/.venv/Lib/site-packages/pandas/io/sas/sas_constants.py
@@ -0,0 +1,253 @@
+magic = (
+    b"\x00\x00\x00\x00\x00\x00\x00\x00"
+    + b"\x00\x00\x00\x00\xc2\xea\x81\x60"
+    + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
+    + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
+)
+
+align_1_checker_value = b"3"
+align_1_offset = 32
+align_1_length = 1
+align_1_value = 4
+u64_byte_checker_value = b"3"
+align_2_offset = 35
+align_2_length = 1
+align_2_value = 4
+endianness_offset = 37
+endianness_length = 1
+platform_offset = 39
+platform_length = 1
+encoding_offset = 70
+encoding_length = 1
+dataset_offset = 92
+dataset_length = 64
+file_type_offset = 156
+file_type_length = 8
+date_created_offset = 164
+date_created_length = 8
+date_modified_offset = 172
+date_modified_length = 8
+header_size_offset = 196
+header_size_length = 4
+page_size_offset = 200
+page_size_length = 4
+page_count_offset = 204
+page_count_length = 4
+sas_release_offset = 216
+sas_release_length = 8
+sas_server_type_offset = 224
+sas_server_type_length = 16
+os_version_number_offset = 240
+os_version_number_length = 16
+os_maker_offset = 256
+os_maker_length = 16
+os_name_offset = 272
+os_name_length = 16
+page_bit_offset_x86 = 16
+page_bit_offset_x64 = 32
+subheader_pointer_length_x86 = 12
+subheader_pointer_length_x64 = 24
+page_type_offset = 0
+page_type_length = 2
+block_count_offset = 2
+block_count_length = 2
+subheader_count_offset = 4
+subheader_count_length = 2
+page_meta_type = 0
+page_data_type = 256
+page_amd_type = 1024
+page_metc_type = 16384
+page_comp_type = -28672
+page_mix_types = [512, 640]
+subheader_pointers_offset = 8
+truncated_subheader_id = 1
+compressed_subheader_id = 4
+compressed_subheader_type = 1
+text_block_size_length = 2
+row_length_offset_multiplier = 5
+row_count_offset_multiplier = 6
+col_count_p1_multiplier = 9
+col_count_p2_multiplier = 10
+row_count_on_mix_page_offset_multiplier = 15
+column_name_pointer_length = 8
+column_name_text_subheader_offset = 0
+column_name_text_subheader_length = 2
+column_name_offset_offset = 2
+column_name_offset_length = 2
+column_name_length_offset = 4
+column_name_length_length = 2
+column_data_offset_offset = 8
+column_data_length_offset = 8
+column_data_length_length = 4
+column_type_offset = 14
+column_type_length = 1
+column_format_text_subheader_index_offset = 22
+column_format_text_subheader_index_length = 2
+column_format_offset_offset = 24
+column_format_offset_length = 2
+column_format_length_offset = 26
+column_format_length_length = 2
+column_label_text_subheader_index_offset = 28
+column_label_text_subheader_index_length = 2
+column_label_offset_offset = 30
+column_label_offset_length = 2
+column_label_length_offset = 32
+column_label_length_length = 2
+rle_compression = b"SASYZCRL"
+rdc_compression = b"SASYZCR2"
+
+compression_literals = [rle_compression, rdc_compression]
+
+# Incomplete list of encodings, using SAS nomenclature:
+# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
+encoding_names = {
+    29: "latin1",
+    20: "utf-8",
+    33: "cyrillic",
+    60: "wlatin2",
+    61: "wcyrillic",
+    62: "wlatin1",
+    90: "ebcdic870",
+}
+
+
+class SASIndex:
+    row_size_index = 0
+    column_size_index = 1
+    subheader_counts_index = 2
+    column_text_index = 3
+    column_name_index = 4
+    column_attributes_index = 5
+    format_and_label_index = 6
+    column_list_index = 7
+    data_subheader_index = 8
+
+
+subheader_signature_to_index = {
+    b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
+    b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
+    b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+    b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+}
+
+
+# List of frequently used SAS date and datetime formats
+# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
+# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
+sas_date_formats = (
+    "DATE",
+    "DAY",
+    "DDMMYY",
+    "DOWNAME",
+    "JULDAY",
+    "JULIAN",
+    "MMDDYY",
+    "MMYY",
+    "MMYYC",
+    "MMYYD",
+    "MMYYP",
+    "MMYYS",
+    "MMYYN",
+    "MONNAME",
+    "MONTH",
+    "MONYY",
+    "QTR",
+    "QTRR",
+    "NENGO",
+    "WEEKDATE",
+    "WEEKDATX",
+    "WEEKDAY",
+    "WEEKV",
+    "WORDDATE",
+    "WORDDATX",
+    "YEAR",
+    "YYMM",
+    "YYMMC",
+    "YYMMD",
+    "YYMMP",
+    "YYMMS",
+    "YYMMN",
+    "YYMON",
+    "YYMMDD",
+    "YYQ",
+    "YYQC",
+    "YYQD",
+    "YYQP",
+    "YYQS",
+    "YYQN",
+    "YYQR",
+    "YYQRC",
+    "YYQRD",
+    "YYQRP",
+    "YYQRS",
+    "YYQRN",
+    "YYMMDDP",
+    "YYMMDDC",
+    "E8601DA",
+    "YYMMDDN",
+    "MMDDYYC",
+    "MMDDYYS",
+    "MMDDYYD",
+    "YYMMDDS",
+    "B8601DA",
+    "DDMMYYN",
+    "YYMMDDD",
+    "DDMMYYB",
+    "DDMMYYP",
+    "MMDDYYP",
+    "YYMMDDB",
+    "MMDDYYN",
+    "DDMMYYC",
+    "DDMMYYD",
+    "DDMMYYS",
+    "MINGUO",
+)
+
+sas_datetime_formats = (
+    "DATETIME",
+    "DTWKDATX",
+    "B8601DN",
+    "B8601DT",
+    "B8601DX",
+    "B8601DZ",
+    "B8601LX",
+    "E8601DN",
+    "E8601DT",
+    "E8601DX",
+    "E8601DZ",
+    "E8601LX",
+    "DATEAMPM",
+    "DTDATE",
+    "DTMONYY",
+    "DTMONYY",
+    "DTWKDATX",
+    "DTYEAR",
+    "TOD",
+    "MDYAMPM",
+)
--- a/.venv/Lib/site-packages/pandas/io/sas/sas_xport.py
+++ b/.venv/Lib/site-packages/pandas/io/sas/sas_xport.py
@@ -0,0 +1,496 @@
+"""
+Read a SAS XPort format file into a Pandas DataFrame.
+
+Based on code from Jack Cushman (github.com/jcushman/xport).
+
+The file format is defined here:
+
+https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
+"""
+from __future__ import annotations
+
+from collections import abc
+from datetime import datetime
+import struct
+import warnings
+
+import numpy as np
+
+from pandas._typing import (
+    FilePath,
+    ReadBuffer,
+)
+from pandas.util._decorators import Appender
+
+import pandas as pd
+
+from pandas.io.common import get_handle
+from pandas.io.sas.sasreader import ReaderBase
+
+_correct_line1 = (
+    "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_header1 = (
+    "HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000"
+)
+_correct_header2 = (
+    "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_obs_header = (
+    "HEADER RECORD*******OBS     HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_fieldkeys = [
+    "ntype",
+    "nhfun",
+    "field_length",
+    "nvar0",
+    "name",
+    "label",
+    "nform",
+    "nfl",
+    "num_decimals",
+    "nfj",
+    "nfill",
+    "niform",
+    "nifl",
+    "nifd",
+    "npos",
+    "_",
+]
+
+
+_base_params_doc = """\
+Parameters
+----------
+filepath_or_buffer : str or file-like object
+    Path to SAS file or object implementing binary read method."""
+
+_params2_doc = """\
+index : identifier of index column
+    Identifier of column that should be used as index of the DataFrame.
+encoding : str
+    Encoding for text data.
+chunksize : int
+    Read file `chunksize` lines at a time, returns iterator."""
+
+_format_params_doc = """\
+format : str
+    File format, only `xport` is currently supported."""
+
+_iterator_doc = """\
+iterator : bool, default False
+    Return XportReader object for reading file incrementally."""
+
+
+_read_sas_doc = f"""Read a SAS file into a DataFrame.
+
+{_base_params_doc}
+{_format_params_doc}
+{_params2_doc}
+{_iterator_doc}
+
+Returns
+-------
+DataFrame or XportReader
+
+Examples
+--------
+Read a SAS Xport file:
+
+>>> df = pd.read_sas('filename.XPT')
+
+Read a Xport file in 10,000 line chunks:
+
+>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
+>>> for chunk in itr:
+>>>     do_something(chunk)
+
+"""
+
+_xport_reader_doc = f"""\
+Class for reading SAS Xport files.
+
+{_base_params_doc}
+{_params2_doc}
+
+Attributes
+----------
+member_info : list
+    Contains information about the file
+fields : list
+    Contains information about the variables in the file
+"""
+
+_read_method_doc = """\
+Read observations from SAS Xport file, returning as data frame.
+
+Parameters
+----------
+nrows : int
+    Number of rows to read from data file; if None, read whole
+    file.
+
+Returns
+-------
+A DataFrame.
+"""
+
+
+def _parse_date(datestr: str) -> datetime:
+    """Given a date in xport format, return Python date."""
+    try:
+        # e.g. "16FEB11:10:07:55"
+        return datetime.strptime(datestr, "%d%b%y:%H:%M:%S")
+    except ValueError:
+        return pd.NaT
+
+
+def _split_line(s: str, parts):
+    """
+    Parameters
+    ----------
+    s: str
+        Fixed-length string to split
+    parts: list of (name, length) pairs
+        Used to break up string, name '_' will be filtered from output.
+
+    Returns
+    -------
+    Dict of name:contents of string at given location.
+    """
+    out = {}
+    start = 0
+    for name, length in parts:
+        out[name] = s[start : start + length].strip()
+        start += length
+    del out["_"]
+    return out
+
+
+def _handle_truncated_float_vec(vec, nbytes):
+    # This feature is not well documented, but some SAS XPORT files
+    # have 2-7 byte "truncated" floats.  To read these truncated
+    # floats, pad them with zeros on the right to make 8 byte floats.
+    #
+    # References:
+    # https://github.com/jcushman/xport/pull/3
+    # The R "foreign" library
+
+    if nbytes != 8:
+        vec1 = np.zeros(len(vec), np.dtype("S8"))
+        dtype = np.dtype(f"S{nbytes},S{8 - nbytes}")
+        vec2 = vec1.view(dtype=dtype)
+        vec2["f0"] = vec
+        return vec2
+
+    return vec
+
+
+def _parse_float_vec(vec):
+    """
+    Parse a vector of float values representing IBM 8 byte floats into
+    native 8 byte floats.
+    """
+    dtype = np.dtype(">u4,>u4")
+    vec1 = vec.view(dtype=dtype)
+    xport1 = vec1["f0"]
+    xport2 = vec1["f1"]
+
+    # Start by setting first half of ieee number to first half of IBM
+    # number sans exponent
+    ieee1 = xport1 & 0x00FFFFFF
+
+    # The fraction bit to the left of the binary point in the ieee
+    # format was set and the number was shifted 0, 1, 2, or 3
+    # places. This will tell us how to adjust the ibm exponent to be a
+    # power of 2 ieee exponent and how to shift the fraction bits to
+    # restore the correct magnitude.
+    shift = np.zeros(len(vec), dtype=np.uint8)
+    shift[np.where(xport1 & 0x00200000)] = 1
+    shift[np.where(xport1 & 0x00400000)] = 2
+    shift[np.where(xport1 & 0x00800000)] = 3
+
+    # shift the ieee number down the correct number of places then
+    # set the second half of the ieee number to be the second half
+    # of the ibm number shifted appropriately, ored with the bits
+    # from the first half that would have been shifted in if we
+    # could shift a double. All we are worried about are the low
+    # order 3 bits of the first half since we're only shifting by
+    # 1, 2, or 3.
+    ieee1 >>= shift
+    ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift)))
+
+    # clear the 1 bit to the left of the binary point
+    ieee1 &= 0xFFEFFFFF
+
+    # set the exponent of the ieee number to be the actual exponent
+    # plus the shift count + 1023. Or this into the first half of the
+    # ieee number. The ibm exponent is excess 64 but is adjusted by 65
+    # since during conversion to ibm format the exponent is
+    # incremented by 1 and the fraction bits left 4 positions to the
+    # right of the radix point.  (had to add >> 24 because C treats &
+    # 0x7f as 0x7f000000 and Python doesn't)
+    ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | (
+        xport1 & 0x80000000
+    )
+
+    ieee = np.empty((len(ieee1),), dtype=">u4,>u4")
+    ieee["f0"] = ieee1
+    ieee["f1"] = ieee2
+    ieee = ieee.view(dtype=">f8")
+    ieee = ieee.astype("f8")
+
+    return ieee
+
+
+class XportReader(ReaderBase, abc.Iterator):
+    __doc__ = _xport_reader_doc
+
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        index=None,
+        encoding: str | None = "ISO-8859-1",
+        chunksize=None,
+    ):
+
+        self._encoding = encoding
+        self._lines_read = 0
+        self._index = index
+        self._chunksize = chunksize
+
+        self.handles = get_handle(
+            filepath_or_buffer, "rb", encoding=encoding, is_text=False
+        )
+        self.filepath_or_buffer = self.handles.handle
+
+        try:
+            self._read_header()
+        except Exception:
+            self.close()
+            raise
+
+    def close(self):
+        self.handles.close()
+
+    def _get_row(self):
+        return self.filepath_or_buffer.read(80).decode()
+
+    def _read_header(self):
+        self.filepath_or_buffer.seek(0)
+
+        # read file header
+        line1 = self._get_row()
+        if line1 != _correct_line1:
+            if "**COMPRESSED**" in line1:
+                # this was created with the PROC CPORT method and can't be read
+                # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm
+                raise ValueError(
+                    "Header record indicates a CPORT file, which is not readable."
+                )
+            raise ValueError("Header record is not an XPORT file.")
+
+        line2 = self._get_row()
+        fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]]
+        file_info = _split_line(line2, fif)
+        if file_info["prefix"] != "SAS     SAS     SASLIB":
+            raise ValueError("Header record has invalid prefix.")
+        file_info["created"] = _parse_date(file_info["created"])
+        self.file_info = file_info
+
+        line3 = self._get_row()
+        file_info["modified"] = _parse_date(line3[:16])
+
+        # read member header
+        header1 = self._get_row()
+        header2 = self._get_row()
+        headflag1 = header1.startswith(_correct_header1)
+        headflag2 = header2 == _correct_header2
+        if not (headflag1 and headflag2):
+            raise ValueError("Member header not found")
+        # usually 140, could be 135
+        fieldnamelength = int(header1[-5:-2])
+
+        # member info
+        mem = [
+            ["prefix", 8],
+            ["set_name", 8],
+            ["sasdata", 8],
+            ["version", 8],
+            ["OS", 8],
+            ["_", 24],
+            ["created", 16],
+        ]
+        member_info = _split_line(self._get_row(), mem)
+        mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]]
+        member_info.update(_split_line(self._get_row(), mem))
+        member_info["modified"] = _parse_date(member_info["modified"])
+        member_info["created"] = _parse_date(member_info["created"])
+        self.member_info = member_info
+
+        # read field names
+        types = {1: "numeric", 2: "char"}
+        fieldcount = int(self._get_row()[54:58])
+        datalength = fieldnamelength * fieldcount
+        # round up to nearest 80
+        if datalength % 80:
+            datalength += 80 - datalength % 80
+        fielddata = self.filepath_or_buffer.read(datalength)
+        fields = []
+        obs_length = 0
+        while len(fielddata) >= fieldnamelength:
+            # pull data for one field
+            fieldbytes, fielddata = (
+                fielddata[:fieldnamelength],
+                fielddata[fieldnamelength:],
+            )
+
+            # rest at end gets ignored, so if field is short, pad out
+            # to match struct pattern below
+            fieldbytes = fieldbytes.ljust(140)
+
+            fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", fieldbytes)
+            field = dict(zip(_fieldkeys, fieldstruct))
+            del field["_"]
+            field["ntype"] = types[field["ntype"]]
+            fl = field["field_length"]
+            if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)):
+                msg = f"Floating field width {fl} is not between 2 and 8."
+                raise TypeError(msg)
+
+            for k, v in field.items():
+                try:
+                    field[k] = v.strip()
+                except AttributeError:
+                    pass
+
+            obs_length += field["field_length"]
+            fields += [field]
+
+        header = self._get_row()
+        if not header == _correct_obs_header:
+            raise ValueError("Observation header not found.")
+
+        self.fields = fields
+        self.record_length = obs_length
+        self.record_start = self.filepath_or_buffer.tell()
+
+        self.nobs = self._record_count()
+        self.columns = [x["name"].decode() for x in self.fields]
+
+        # Setup the dtype.
+        dtypel = [
+            ("s" + str(i), "S" + str(field["field_length"]))
+            for i, field in enumerate(self.fields)
+        ]
+        dtype = np.dtype(dtypel)
+        self._dtype = dtype
+
+    def __next__(self):
+        return self.read(nrows=self._chunksize or 1)
+
+    def _record_count(self) -> int:
+        """
+        Get number of records in file.
+
+        This is maybe suboptimal because we have to seek to the end of
+        the file.
+
+        Side effect: returns file position to record_start.
+        """
+        self.filepath_or_buffer.seek(0, 2)
+        total_records_length = self.filepath_or_buffer.tell() - self.record_start
+
+        if total_records_length % 80 != 0:
+            warnings.warn("xport file may be corrupted.")
+
+        if self.record_length > 80:
+            self.filepath_or_buffer.seek(self.record_start)
+            return total_records_length // self.record_length
+
+        self.filepath_or_buffer.seek(-80, 2)
+        last_card_bytes = self.filepath_or_buffer.read(80)
+        last_card = np.frombuffer(last_card_bytes, dtype=np.uint64)
+
+        # 8 byte blank
+        ix = np.flatnonzero(last_card == 2314885530818453536)
+
+        if len(ix) == 0:
+            tail_pad = 0
+        else:
+            tail_pad = 8 * len(ix)
+
+        self.filepath_or_buffer.seek(self.record_start)
+
+        return (total_records_length - tail_pad) // self.record_length
+
+    def get_chunk(self, size=None):
+        """
+        Reads lines from Xport file and returns as dataframe
+
+        Parameters
+        ----------
+        size : int, defaults to None
+            Number of lines to read.  If None, reads whole file.
+
+        Returns
+        -------
+        DataFrame
+        """
+        if size is None:
+            size = self._chunksize
+        return self.read(nrows=size)
+
+    def _missing_double(self, vec):
+        v = vec.view(dtype="u1,u1,u2,u4")
+        miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0)
+        miss1 = (
+            ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A))
+            | (v["f0"] == 0x5F)
+            | (v["f0"] == 0x2E)
+        )
+        miss &= miss1
+        return miss
+
+    @Appender(_read_method_doc)
+    def read(self, nrows=None):
+
+        if nrows is None:
+            nrows = self.nobs
+
+        read_lines = min(nrows, self.nobs - self._lines_read)
+        read_len = read_lines * self.record_length
+        if read_len <= 0:
+            self.close()
+            raise StopIteration
+        raw = self.filepath_or_buffer.read(read_len)
+        data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)
+
+        df = pd.DataFrame(index=range(read_lines))
+        for j, x in enumerate(self.columns):
+            vec = data["s" + str(j)]
+            ntype = self.fields[j]["ntype"]
+            if ntype == "numeric":
+                vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"])
+                miss = self._missing_double(vec)
+                v = _parse_float_vec(vec)
+                v[miss] = np.nan
+            elif self.fields[j]["ntype"] == "char":
+                v = [y.rstrip() for y in vec]
+
+                if self._encoding is not None:
+                    v = [y.decode(self._encoding) for y in v]
+
+            df[x] = v
+
+        if self._index is None:
+            df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines))
+        else:
+            df = df.set_index(self._index)
+
+        self._lines_read += read_lines
+
+        return df
--- a/.venv/Lib/site-packages/pandas/io/sas/sasreader.py
+++ b/.venv/Lib/site-packages/pandas/io/sas/sasreader.py
@@ -0,0 +1,158 @@
+"""
+Read SAS sas7bdat or xport files.
+"""
+from __future__ import annotations
+
+from abc import (
+    ABCMeta,
+    abstractmethod,
+)
+from typing import (
+    TYPE_CHECKING,
+    Hashable,
+    overload,
+)
+
+from pandas._typing import (
+    FilePath,
+    ReadBuffer,
+)
+
+from pandas.io.common import stringify_path
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
+
+# TODO(PY38): replace with Protocol in Python 3.8
+class ReaderBase(metaclass=ABCMeta):
+    """
+    Protocol for XportReader and SAS7BDATReader classes.
+    """
+
+    @abstractmethod
+    def read(self, nrows=None):
+        pass
+
+    @abstractmethod
+    def close(self):
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: int = ...,
+    iterator: bool = ...,
+) -> ReaderBase:
+    ...
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: None = ...,
+    iterator: bool = ...,
+) -> DataFrame | ReaderBase:
+    ...
+
+
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    format: str | None = None,
+    index: Hashable | None = None,
+    encoding: str | None = None,
+    chunksize: int | None = None,
+    iterator: bool = False,
+) -> DataFrame | ReaderBase:
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.sas``.
+    format : str {'xport', 'sas7bdat'} or None
+        If None, file format is inferred from file extension. If 'xport' or
+        'sas7bdat', uses the corresponding format.
+    index : identifier of index column, defaults to None
+        Identifier of column that should be used as index of the DataFrame.
+    encoding : str, default is None
+        Encoding for text data.  If None, text data are stored as raw bytes.
+    chunksize : int
+        Read file `chunksize` lines at a time, returns iterator.
+
+        .. versionchanged:: 1.2
+
+            ``TextFileReader`` is a context manager.
+    iterator : bool, defaults to False
+        If True, returns an iterator for reading the file incrementally.
+
+        .. versionchanged:: 1.2
+
+            ``TextFileReader`` is a context manager.
+
+    Returns
+    -------
+    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
+    or XportReader
+    """
+    if format is None:
+        buffer_error_msg = (
+            "If this is a buffer object rather "
+            "than a string name, you must specify a format string"
+        )
+        filepath_or_buffer = stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, str):
+            raise ValueError(buffer_error_msg)
+        fname = filepath_or_buffer.lower()
+        if fname.endswith(".xpt"):
+            format = "xport"
+        elif fname.endswith(".sas7bdat"):
+            format = "sas7bdat"
+        else:
+            raise ValueError("unable to infer format of SAS file")
+
+    reader: ReaderBase
+    if format.lower() == "xport":
+        from pandas.io.sas.sas_xport import XportReader
+
+        reader = XportReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+        )
+    elif format.lower() == "sas7bdat":
+        from pandas.io.sas.sas7bdat import SAS7BDATReader
+
+        reader = SAS7BDATReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+        )
+    else:
+        raise ValueError("unknown SAS format")
+
+    if iterator or chunksize:
+        return reader
+
+    with reader:
+        return reader.read()
				`@@ -0,0 +1 @@`
				`from pandas.io.sas.sasreader import read_sas # noqa:F401`