first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .local_sources_watcher import LocalSourcesWatcher as LocalSourcesWatcher
from .path_watcher import (
report_watchdog_availability as report_watchdog_availability,
watch_dir as watch_dir,
watch_file as watch_file,
)

View File

@@ -0,0 +1,373 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Declares the EventBasedPathWatcher class, which watches given paths in the file system.
How these classes work together
-------------------------------
- EventBasedPathWatcher : each instance of this is able to watch a single
file or directory at a given path so long as there's a browser interested in
it. This uses _MultiPathWatcher to watch paths.
- _MultiPathWatcher : singleton that watches multiple paths. It does this by
holding a watchdog.observer.Observer object, and manages several
_FolderEventHandler instances. This creates _FolderEventHandlers as needed,
if the required folder is not already being watched. And it also tells
existing _FolderEventHandlers which paths it should be watching for.
- _FolderEventHandler : event handler for when a folder is modified. You can
register paths in that folder that you're interested in. Then this object
listens to folder events, sees if registered paths changed, and fires
callbacks if so.
"""
import os
import threading
from typing import Callable, cast, Dict, Optional
from blinker import Signal, ANY
from streamlit.util import repr_
from streamlit.watcher import util
from watchdog import events
from watchdog.observers import Observer
from streamlit.logger import get_logger
LOGGER = get_logger(__name__)
class EventBasedPathWatcher:
"""Watches a single path on disk using watchdog"""
@staticmethod
def close_all() -> None:
"""Close the _MultiPathWatcher singleton."""
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.close()
LOGGER.debug("Watcher closed")
def __init__(
self,
path: str,
on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> None:
"""Constructor for EventBasedPathWatchers.
Parameters
----------
path : str
The path to watch.
on_changed : Callable[[str], None]
Callback to call when the path changes.
glob_pattern : Optional[str]
A glob pattern to filter the files in a directory that should be
watched. Only relevant when creating an EventBasedPathWatcher on a
directory.
allow_nonexistent : bool
If True, the watcher will not raise an exception if the path does
not exist. This can be used to watch for the creation of a file or
directory at a given path.
"""
self._path = os.path.abspath(path)
self._on_changed = on_changed
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.watch_path(
self._path,
on_changed,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
LOGGER.debug("Watcher created for %s", self._path)
def __repr__(self) -> str:
return repr_(self)
def close(self) -> None:
"""Stop watching the path corresponding to this EventBasedPathWatcher."""
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.stop_watching_path(self._path, self._on_changed)
class _MultiPathWatcher(object):
"""Watches multiple paths."""
_singleton: Optional["_MultiPathWatcher"] = None
@classmethod
def get_singleton(cls) -> "_MultiPathWatcher":
"""Return the singleton _MultiPathWatcher object.
Instantiates one if necessary.
"""
if cls._singleton is None:
LOGGER.debug("No singleton. Registering one.")
_MultiPathWatcher()
return cast("_MultiPathWatcher", _MultiPathWatcher._singleton)
# Don't allow constructor to be called more than once.
def __new__(cls) -> "_MultiPathWatcher":
"""Constructor."""
if _MultiPathWatcher._singleton is not None:
raise RuntimeError("Use .get_singleton() instead")
return super(_MultiPathWatcher, cls).__new__(cls)
def __init__(self) -> None:
"""Constructor."""
_MultiPathWatcher._singleton = self
# Map of folder_to_watch -> _FolderEventHandler.
self._folder_handlers: Dict[str, _FolderEventHandler] = {}
# Used for mutation of _folder_handlers dict
self._lock = threading.Lock()
# The Observer object from the Watchdog module. Since this class is
# only instantiated once, we only have a single Observer in Streamlit,
# and it's in charge of watching all paths we're interested in.
self._observer = Observer()
self._observer.start() # Start observer thread.
def __repr__(self) -> str:
return repr_(self)
def watch_path(
self,
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> None:
"""Start watching a path."""
folder_path = os.path.abspath(os.path.dirname(path))
with self._lock:
folder_handler = self._folder_handlers.get(folder_path)
if folder_handler is None:
folder_handler = _FolderEventHandler()
self._folder_handlers[folder_path] = folder_handler
folder_handler.watch = self._observer.schedule(
folder_handler, folder_path, recursive=True
)
folder_handler.add_path_change_listener(
path,
callback,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
def stop_watching_path(self, path: str, callback: Callable[[str], None]) -> None:
"""Stop watching a path."""
folder_path = os.path.abspath(os.path.dirname(path))
with self._lock:
folder_handler = self._folder_handlers.get(folder_path)
if folder_handler is None:
LOGGER.debug(
"Cannot stop watching path, because it is already not being "
"watched. %s",
folder_path,
)
return
folder_handler.remove_path_change_listener(path, callback)
if not folder_handler.is_watching_paths():
# Sometimes watchdog's FileSystemEventHandler does not have
# a .watch property. It's unclear why -- may be due to a
# race condition.
if hasattr(folder_handler, "watch"):
self._observer.unschedule(folder_handler.watch)
del self._folder_handlers[folder_path]
def close(self) -> None:
with self._lock:
"""Close this _MultiPathWatcher object forever."""
if len(self._folder_handlers) != 0:
self._folder_handlers = {}
LOGGER.debug(
"Stopping observer thread even though there is a non-zero "
"number of event observers!"
)
else:
LOGGER.debug("Stopping observer thread")
self._observer.stop()
self._observer.join(timeout=5)
class WatchedPath(object):
"""Emits notifications when a single path is modified."""
def __init__(
self,
md5: str,
modification_time: float,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
):
self.md5 = md5
self.modification_time = modification_time
self.glob_pattern = glob_pattern
self.allow_nonexistent = allow_nonexistent
self.on_changed = Signal()
def __repr__(self) -> str:
return repr_(self)
class _FolderEventHandler(events.FileSystemEventHandler):
"""Listen to folder events. If certain paths change, fire a callback.
The super class, FileSystemEventHandler, listens to changes to *folders*,
but we need to listen to changes to *both* folders and files. I believe
this is a limitation of the Mac FSEvents system API, and the watchdog
library takes the lower common denominator.
So in this class we watch for folder events and then filter them based
on whether or not we care for the path the event is about.
"""
def __init__(self) -> None:
super(_FolderEventHandler, self).__init__()
self._watched_paths: Dict[str, WatchedPath] = {}
self._lock = threading.Lock() # for watched_paths mutations
def __repr__(self) -> str:
return repr_(self)
def add_path_change_listener(
self,
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> None:
"""Add a path to this object's event filter."""
with self._lock:
watched_path = self._watched_paths.get(path, None)
if watched_path is None:
md5 = util.calc_md5_with_blocking_retries(
path,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
modification_time = util.path_modification_time(path, allow_nonexistent)
watched_path = WatchedPath(
md5=md5,
modification_time=modification_time,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
self._watched_paths[path] = watched_path
watched_path.on_changed.connect(callback, weak=False)
def remove_path_change_listener(
self, path: str, callback: Callable[[str], None]
) -> None:
"""Remove a path from this object's event filter."""
with self._lock:
watched_path = self._watched_paths.get(path, None)
if watched_path is None:
return
watched_path.on_changed.disconnect(callback)
if not watched_path.on_changed.has_receivers_for(ANY):
del self._watched_paths[path]
def is_watching_paths(self) -> bool:
"""Return true if this object has 1+ paths in its event filter."""
return len(self._watched_paths) > 0
def handle_path_change_event(self, event: events.FileSystemEvent) -> None:
"""Handle when a path (corresponding to a file or dir) is changed.
The events that can call this are modification, creation or moved
events.
"""
# Check for both modified and moved files, because many programs write
# to a backup file then rename (i.e. move) it.
if event.event_type == events.EVENT_TYPE_MODIFIED:
changed_path = event.src_path
elif event.event_type == events.EVENT_TYPE_MOVED:
LOGGER.debug("Move event: src %s; dest %s", event.src_path, event.dest_path)
changed_path = event.dest_path
# On OSX with VI, on save, the file is deleted, the swap file is
# modified and then the original file is created hence why we
# capture EVENT_TYPE_CREATED
elif event.event_type == events.EVENT_TYPE_CREATED:
changed_path = event.src_path
else:
LOGGER.debug("Don't care about event type %s", event.event_type)
return
changed_path = os.path.abspath(changed_path)
changed_path_info = self._watched_paths.get(changed_path, None)
if changed_path_info is None:
LOGGER.debug(
"Ignoring changed path %s.\nWatched_paths: %s",
changed_path,
self._watched_paths,
)
return
modification_time = util.path_modification_time(
changed_path, changed_path_info.allow_nonexistent
)
if modification_time == changed_path_info.modification_time:
LOGGER.debug("File/dir timestamp did not change: %s", changed_path)
return
changed_path_info.modification_time = modification_time
new_md5 = util.calc_md5_with_blocking_retries(
changed_path,
glob_pattern=changed_path_info.glob_pattern,
allow_nonexistent=changed_path_info.allow_nonexistent,
)
if new_md5 == changed_path_info.md5:
LOGGER.debug("File/dir MD5 did not change: %s", changed_path)
return
LOGGER.debug("File/dir MD5 changed: %s", changed_path)
changed_path_info.md5 = new_md5
changed_path_info.on_changed.send(changed_path)
def on_created(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)
def on_modified(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)
def on_moved(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)

View File

@@ -0,0 +1,197 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import collections
import types
from typing import Callable, Dict, List, Optional, Set
from streamlit import config
from streamlit import file_util
from streamlit.folder_black_list import FolderBlackList
from streamlit.logger import get_logger
from streamlit.session_data import SessionData
from streamlit.watcher.path_watcher import (
get_default_path_watcher_class,
NoOpPathWatcher,
)
LOGGER = get_logger(__name__)
WatchedModule = collections.namedtuple("WatchedModule", ["watcher", "module_name"])
# This needs to be initialized lazily to avoid calling config.get_option() and
# thus initializing config options when this file is first imported.
PathWatcher = None
class LocalSourcesWatcher:
def __init__(self, session_data: SessionData):
self._session_data = session_data
self._on_file_changed: List[Callable[[], None]] = []
self._is_closed = False
self._cached_sys_modules: Set[str] = set()
# Blacklist for folders that should not be watched
self._folder_black_list = FolderBlackList(
config.get_option("server.folderWatchBlacklist")
)
self._watched_modules: Dict[str, WatchedModule] = {}
self._register_watcher(
self._session_data.main_script_path,
module_name=None, # Only the root script has None here.
)
def register_file_change_callback(self, cb: Callable[[], None]) -> None:
self._on_file_changed.append(cb)
def on_file_changed(self, filepath):
if filepath not in self._watched_modules:
LOGGER.error("Received event for non-watched file: %s", filepath)
return
# Workaround:
# Delete all watched modules so we can guarantee changes to the
# updated module are reflected on reload.
#
# In principle, for reloading a given module, we only need to unload
# the module itself and all of the modules which import it (directly
# or indirectly) such that when we exec the application code, the
# changes are reloaded and reflected in the running application.
#
# However, determining all import paths for a given loaded module is
# non-trivial, and so as a workaround we simply unload all watched
# modules.
for wm in self._watched_modules.values():
if wm.module_name is not None and wm.module_name in sys.modules:
del sys.modules[wm.module_name]
for cb in self._on_file_changed:
cb()
def close(self):
for wm in self._watched_modules.values():
wm.watcher.close()
self._watched_modules = {}
self._is_closed = True
def _register_watcher(self, filepath, module_name):
global PathWatcher
if PathWatcher is None:
PathWatcher = get_default_path_watcher_class()
if PathWatcher is NoOpPathWatcher:
return
try:
wm = WatchedModule(
watcher=PathWatcher(filepath, self.on_file_changed),
module_name=module_name,
)
except PermissionError:
# If you don't have permission to read this file, don't even add it
# to watchers.
return
self._watched_modules[filepath] = wm
def _deregister_watcher(self, filepath):
if filepath not in self._watched_modules:
return
if filepath == self._session_data.main_script_path:
return
wm = self._watched_modules[filepath]
wm.watcher.close()
del self._watched_modules[filepath]
def _file_is_new(self, filepath):
return filepath not in self._watched_modules
def _file_should_be_watched(self, filepath):
# Using short circuiting for performance.
return self._file_is_new(filepath) and (
file_util.file_is_in_folder_glob(filepath, self._session_data.script_folder)
or file_util.file_in_pythonpath(filepath)
)
def update_watched_modules(self):
if self._is_closed:
return
if set(sys.modules) != self._cached_sys_modules:
modules_paths = {
name: self._exclude_blacklisted_paths(get_module_paths(module))
for name, module in dict(sys.modules).items()
}
self._cached_sys_modules = set(sys.modules)
self._register_necessary_watchers(modules_paths)
def _register_necessary_watchers(self, module_paths: Dict[str, Set[str]]) -> None:
for name, paths in module_paths.items():
for path in paths:
if self._file_should_be_watched(path):
self._register_watcher(path, name)
def _exclude_blacklisted_paths(self, paths: Set[str]) -> Set[str]:
return {p for p in paths if not self._folder_black_list.is_blacklisted(p)}
def get_module_paths(module: types.ModuleType) -> Set[str]:
paths_extractors = [
# https://docs.python.org/3/reference/datamodel.html
# __file__ is the pathname of the file from which the module was loaded
# if it was loaded from a file.
# The __file__ attribute may be missing for certain types of modules
lambda m: [m.__file__],
# https://docs.python.org/3/reference/import.html#__spec__
# The __spec__ attribute is set to the module spec that was used
# when importing the module. one exception is __main__,
# where __spec__ is set to None in some cases.
# https://www.python.org/dev/peps/pep-0451/#id16
# "origin" in an import context means the system
# (or resource within a system) from which a module originates
# ... It is up to the loader to decide on how to interpret
# and use a module's origin, if at all.
lambda m: [m.__spec__.origin],
# https://www.python.org/dev/peps/pep-0420/
# Handling of "namespace packages" in which the __path__ attribute
# is a _NamespacePath object with a _path attribute containing
# the various paths of the package.
lambda m: [p for p in m.__path__._path],
]
all_paths = set()
for extract_paths in paths_extractors:
potential_paths = []
try:
potential_paths = extract_paths(module)
except AttributeError:
pass
except Exception as e:
LOGGER.warning(f"Examining the path of {module.__name__} raised: {e}")
all_paths.update(
[os.path.abspath(str(p)) for p in potential_paths if _is_valid_path(p)]
)
return all_paths
def _is_valid_path(path: Optional[str]) -> bool:
return isinstance(path, str) and (os.path.isfile(path) or os.path.isdir(path))

View File

@@ -0,0 +1,184 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable, Optional, Type, Union
import click
import streamlit.watcher
from streamlit import config
from streamlit import env_util
from streamlit.logger import get_logger
from streamlit.watcher.polling_path_watcher import PollingPathWatcher
LOGGER = get_logger(__name__)
try:
# Check if the watchdog module is installed.
from streamlit.watcher.event_based_path_watcher import EventBasedPathWatcher
watchdog_available = True
except ImportError:
watchdog_available = False
# Stub the EventBasedPathWatcher so it can be mocked by tests
class EventBasedPathWatcher: # type: ignore
pass
# local_sources_watcher.py caches the return value of
# get_default_path_watcher_class(), so it needs to differentiate between the
# cases where it:
# 1. has yet to call get_default_path_watcher_class()
# 2. has called get_default_path_watcher_class(), which returned that no
# path watcher should be installed.
# This forces us to define this stub class since the cached value equaling
# None corresponds to case 1 above.
class NoOpPathWatcher:
def __init__(
self,
_path_str: str,
_on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
):
pass
# EventBasedPathWatcher will be a stub and have no functional
# implementation if its import failed (due to missing watchdog module),
# so we can't reference it directly in this type.
PathWatcherType = Union[
Type["streamlit.watcher.event_based_path_watcher.EventBasedPathWatcher"],
Type[PollingPathWatcher],
Type[NoOpPathWatcher],
]
def report_watchdog_availability():
if not watchdog_available:
if not config.get_option("global.disableWatchdogWarning"):
msg = "\n $ xcode-select --install" if env_util.IS_DARWIN else ""
click.secho(
" %s" % "For better performance, install the Watchdog module:",
fg="blue",
bold=True,
)
click.secho(
"""%s
$ pip install watchdog
"""
% msg
)
def _watch_path(
path: str,
on_path_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> bool:
"""Create a PathWatcher for the given path if we have a viable
PathWatcher class.
Parameters
----------
path
Path to watch.
on_path_changed
Function that's called when the path changes.
watcher_type
Optional watcher_type string. If None, it will default to the
'server.fileWatcherType` config option.
glob_pattern
Optional glob pattern to use when watching a directory. If set, only
files matching the pattern will be counted as being created/deleted
within the watched directory.
allow_nonexistent
If True, allow the file or directory at the given path to be
nonexistent.
Returns
-------
bool
True if the path is being watched, or False if we have no
PathWatcher class.
"""
if watcher_type is None:
watcher_type = config.get_option("server.fileWatcherType")
watcher_class = get_path_watcher_class(watcher_type)
if watcher_class is NoOpPathWatcher:
return False
watcher_class(
path,
on_path_changed,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
return True
def watch_file(
path: str,
on_file_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
) -> bool:
return _watch_path(path, on_file_changed, watcher_type)
def watch_dir(
path: str,
on_dir_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> bool:
return _watch_path(
path,
on_dir_changed,
watcher_type,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
def get_default_path_watcher_class() -> PathWatcherType:
"""Return the class to use for path changes notifications, based on the
server.fileWatcherType config option.
"""
return get_path_watcher_class(config.get_option("server.fileWatcherType"))
def get_path_watcher_class(watcher_type: str) -> PathWatcherType:
"""Return the PathWatcher class that corresponds to the given watcher_type
string. Acceptable values are 'auto', 'watchdog', 'poll' and 'none'.
"""
if watcher_type == "auto":
if watchdog_available:
return EventBasedPathWatcher
else:
return PollingPathWatcher
elif watcher_type == "watchdog" and watchdog_available:
return EventBasedPathWatcher
elif watcher_type == "poll":
return PollingPathWatcher
else:
return NoOpPathWatcher

View File

@@ -0,0 +1,123 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A class that watches a given path via polling."""
from concurrent.futures import ThreadPoolExecutor
import os
import time
from typing import Callable, Optional
from streamlit.util import repr_
from streamlit.watcher import util
from streamlit.logger import get_logger
LOGGER = get_logger(__name__)
_MAX_WORKERS = 4
_POLLING_PERIOD_SECS = 0.2
class PollingPathWatcher:
"""Watches a path on disk via a polling loop."""
_executor = ThreadPoolExecutor(max_workers=_MAX_WORKERS)
@staticmethod
def close_all() -> None:
"""Close top-level watcher object.
This is a no-op, and exists for interface parity with
EventBasedPathWatcher.
"""
LOGGER.debug("Watcher closed")
def __init__(
self,
path: str,
on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> None:
"""Constructor.
You do not need to retain a reference to a PollingPathWatcher to
prevent it from being garbage collected. (The global _executor object
retains references to all active instances.)
"""
# TODO(vdonato): Modernize this by switching to pathlib.
self._path = path
self._on_changed = on_changed
self._glob_pattern = glob_pattern
self._allow_nonexistent = allow_nonexistent
self._active = True
self._modification_time = util.path_modification_time(
self._path, self._allow_nonexistent
)
self._md5 = util.calc_md5_with_blocking_retries(
self._path,
glob_pattern=self._glob_pattern,
allow_nonexistent=self._allow_nonexistent,
)
self._schedule()
def __repr__(self) -> str:
return repr_(self)
def _schedule(self) -> None:
def task():
time.sleep(_POLLING_PERIOD_SECS)
self._check_if_path_changed()
PollingPathWatcher._executor.submit(task)
def _check_if_path_changed(self) -> None:
if not self._active:
# Don't call self._schedule()
return
modification_time = util.path_modification_time(
self._path, self._allow_nonexistent
)
if modification_time <= self._modification_time:
self._schedule()
return
self._modification_time = modification_time
md5 = util.calc_md5_with_blocking_retries(
self._path,
glob_pattern=self._glob_pattern,
allow_nonexistent=self._allow_nonexistent,
)
if md5 == self._md5:
self._schedule()
return
self._md5 = md5
LOGGER.debug("Change detected: %s", self._path)
self._on_changed(self._path)
self._schedule()
def close(self) -> None:
"""Stop watching the file system."""
self._active = False

View File

@@ -0,0 +1,144 @@
# Copyright 2018-2022 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A bunch of useful utilities for the watcher.
These are functions that only make sense within the watcher. In particular,
functions that use streamlit.config can go here to avoid a dependency cycle.
"""
import hashlib
import time
import os
from pathlib import Path
from typing import Optional
# How many times to try to grab the MD5 hash.
_MAX_RETRIES = 5
# How long to wait between retries.
_RETRY_WAIT_SECS = 0.1
def calc_md5_with_blocking_retries(
path: str,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
allow_nonexistent: bool = False,
) -> str:
"""Calculate the MD5 checksum of a given path.
For a file, this means calculating the md5 of the file's contents. For a
directory, we concatenate the directory's path with the names of all the
files in it and calculate the md5 of that.
IMPORTANT: This method calls time.sleep(), which blocks execution. So you
should only use this outside the main thread.
"""
if allow_nonexistent and not os.path.exists(path):
content = path.encode("UTF-8")
elif os.path.isdir(path):
glob_pattern = glob_pattern or "*"
content = _stable_dir_identifier(path, glob_pattern).encode("UTF-8")
else:
content = _get_file_content_with_blocking_retries(path)
md5 = hashlib.md5()
md5.update(content)
# Use hexdigest() instead of digest(), so it's easier to debug.
return md5.hexdigest()
def path_modification_time(path: str, allow_nonexistent: bool = False) -> float:
"""Return the modification time of a path (file or directory).
If allow_nonexistent is True and the path does not exist, we return 0.0 to
guarantee that any file/dir later created at the path has a later
modification time than the last time returned by this function for that
path.
If allow_nonexistent is False and no file/dir exists at the path, a
FileNotFoundError is raised (by os.stat).
For any path that does correspond to an existing file/dir, we return its
modification time.
"""
if allow_nonexistent and not os.path.exists(path):
return 0.0
return os.stat(path).st_mtime
def _get_file_content_with_blocking_retries(file_path: str) -> bytes:
content = b""
# There's a race condition where sometimes file_path no longer exists when
# we try to read it (since the file is in the process of being written).
# So here we retry a few times using this loop. See issue #186.
for i in range(_MAX_RETRIES):
try:
with open(file_path, "rb") as f:
content = f.read()
break
except FileNotFoundError as e:
if i >= _MAX_RETRIES - 1:
raise e
time.sleep(_RETRY_WAIT_SECS)
return content
def _dirfiles(dir_path: str, glob_pattern: str) -> str:
p = Path(dir_path)
filenames = sorted(
[f.name for f in p.glob(glob_pattern) if not f.name.startswith(".")]
)
return "+".join(filenames)
def _stable_dir_identifier(dir_path: str, glob_pattern: str) -> str:
"""Wait for the files in a directory to look stable-ish before returning an id.
We do this to deal with problems that would otherwise arise from many tools
(e.g. git) and editors (e.g. vim) "editing" files (from the user's
perspective) by doing some combination of deleting, creating, and moving
various files under the hood.
Because of this, we're unable to rely on FileSystemEvents that we receive
from watchdog to determine when a file has been added to or removed from a
directory.
This is a bit of an unfortunate situation, but the approach we take here is
most likely fine as:
* The worst thing that can happen taking this approach is a false
positive page added/removed notification, which isn't too disastrous
and can just be ignored.
* It is impossible (that is, I'm fairly certain that the problem is
undecidable) to know whether a file created/deleted/moved event
corresponds to a legitimate file creation/deletion/move or is part of
some sequence of events that results in what the user sees as a file
"edit".
"""
dirfiles = _dirfiles(dir_path, glob_pattern)
for _ in range(_MAX_RETRIES):
time.sleep(_RETRY_WAIT_SECS)
new_dirfiles = _dirfiles(dir_path, glob_pattern)
if dirfiles == new_dirfiles:
break
dirfiles = new_dirfiles
return f"{dir_path}+{dirfiles}"