summaryrefslogtreecommitdiffstats
path: root/venv/lib/python3.9/site-packages/smmap/mman.py
diff options
context:
space:
mode:
authornoptuno <repollo.marrero@gmail.com>2023-04-28 02:29:30 +0200
committernoptuno <repollo.marrero@gmail.com>2023-04-28 02:29:30 +0200
commit355dee533bb34a571b9367820a63cccb668cf866 (patch)
tree838af886b4fec07320aeb10f0d1e74ba79e79b5c /venv/lib/python3.9/site-packages/smmap/mman.py
parentadded pyproject.toml file (diff)
downloadgpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar.gz
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar.bz2
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar.lz
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar.xz
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.tar.zst
gpt4free-355dee533bb34a571b9367820a63cccb668cf866.zip
Diffstat (limited to 'venv/lib/python3.9/site-packages/smmap/mman.py')
-rw-r--r--venv/lib/python3.9/site-packages/smmap/mman.py588
1 files changed, 588 insertions, 0 deletions
diff --git a/venv/lib/python3.9/site-packages/smmap/mman.py b/venv/lib/python3.9/site-packages/smmap/mman.py
new file mode 100644
index 00000000..1de7d9e9
--- /dev/null
+++ b/venv/lib/python3.9/site-packages/smmap/mman.py
@@ -0,0 +1,588 @@
+"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
+from .util import (
+ MapWindow,
+ MapRegion,
+ MapRegionList,
+ is_64_bit,
+)
+
+import sys
+from functools import reduce
+
+__all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"]
+#{ Utilities
+
+#}END utilities
+
+
+class WindowCursor:
+
+ """
+ Pointer into the mapped region of the memory manager, keeping the map
+ alive until it is destroyed and no other client uses it.
+
+ Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager
+
+ **Note:**: The current implementation is suited for static and sliding window managers, but it also means
+ that it must be suited for the somewhat quite different sliding manager. It could be improved, but
+ I see no real need to do so."""
+ __slots__ = (
+ '_manager', # the manger keeping all file regions
+ '_rlist', # a regions list with regions for our file
+ '_region', # our current class:`MapRegion` or None
+ '_ofs', # relative offset from the actually mapped area to our start area
+ '_size' # maximum size we should provide
+ )
+
+ def __init__(self, manager=None, regions=None):
+ self._manager = manager
+ self._rlist = regions
+ self._region = None
+ self._ofs = 0
+ self._size = 0
+
+ def __del__(self):
+ self._destroy()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self._destroy()
+
+ def _destroy(self):
+ """Destruction code to decrement counters"""
+ self.unuse_region()
+
+ if self._rlist is not None:
+ # Actual client count, which doesn't include the reference kept by the manager, nor ours
+ # as we are about to be deleted
+ try:
+ if len(self._rlist) == 0:
+ # Free all resources associated with the mapped file
+ self._manager._fdict.pop(self._rlist.path_or_fd())
+ # END remove regions list from manager
+ except (TypeError, KeyError):
+ # sometimes, during shutdown, getrefcount is None. Its possible
+ # to re-import it, however, its probably better to just ignore
+ # this python problem (for now).
+ # The next step is to get rid of the error prone getrefcount alltogether.
+ pass
+ # END exception handling
+ # END handle regions
+
+ def _copy_from(self, rhs):
+ """Copy all data from rhs into this instance, handles usage count"""
+ self._manager = rhs._manager
+ self._rlist = type(rhs._rlist)(rhs._rlist)
+ self._region = rhs._region
+ self._ofs = rhs._ofs
+ self._size = rhs._size
+
+ for region in self._rlist:
+ region.increment_client_count()
+
+ if self._region is not None:
+ self._region.increment_client_count()
+ # END handle regions
+
+ def __copy__(self):
+ """copy module interface"""
+ cpy = type(self)()
+ cpy._copy_from(self)
+ return cpy
+
+ #{ Interface
+ def assign(self, rhs):
+ """Assign rhs to this instance. This is required in order to get a real copy.
+ Alternativly, you can copy an existing instance using the copy module"""
+ self._destroy()
+ self._copy_from(rhs)
+
+ def use_region(self, offset=0, size=0, flags=0):
+ """Assure we point to a window which allows access to the given offset into the file
+
+ :param offset: absolute offset in bytes into the file
+ :param size: amount of bytes to map. If 0, all available bytes will be mapped
+ :param flags: additional flags to be given to os.open in case a file handle is initially opened
+ for mapping. Has no effect if a region can actually be reused.
+ :return: this instance - it should be queried for whether it points to a valid memory region.
+ This is not the case if the mapping failed because we reached the end of the file
+
+ **Note:**: The size actually mapped may be smaller than the given size. If that is the case,
+ either the file has reached its end, or the map was created between two existing regions"""
+ need_region = True
+ man = self._manager
+ fsize = self._rlist.file_size()
+ size = min(size or fsize, man.window_size() or fsize) # clamp size to window size
+
+ if self._region is not None:
+ if self._region.includes_ofs(offset):
+ need_region = False
+ else:
+ self.unuse_region()
+ # END handle existing region
+ # END check existing region
+
+ # offset too large ?
+ if offset >= fsize:
+ return self
+ # END handle offset
+
+ if need_region:
+ self._region = man._obtain_region(self._rlist, offset, size, flags, False)
+ self._region.increment_client_count()
+ # END need region handling
+
+ self._ofs = offset - self._region._b
+ self._size = min(size, self._region.ofs_end() - offset)
+
+ return self
+
+ def unuse_region(self):
+ """Unuse the current region. Does nothing if we have no current region
+
+ **Note:** the cursor unuses the region automatically upon destruction. It is recommended
+ to un-use the region once you are done reading from it in persistent cursors as it
+ helps to free up resource more quickly"""
+ if self._region is not None:
+ self._region.increment_client_count(-1)
+ self._region = None
+ # note: should reset ofs and size, but we spare that for performance. Its not
+ # allowed to query information if we are not valid !
+
+ def buffer(self):
+ """Return a buffer object which allows access to our memory region from our offset
+ to the window size. Please note that it might be smaller than you requested when calling use_region()
+
+ **Note:** You can only obtain a buffer if this instance is_valid() !
+
+ **Note:** buffers should not be cached passed the duration of your access as it will
+ prevent resources from being freed even though they might not be accounted for anymore !"""
+ return memoryview(self._region.buffer())[self._ofs:self._ofs+self._size]
+
+ def map(self):
+ """
+ :return: the underlying raw memory map. Please not that the offset and size is likely to be different
+ to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole
+ file in case of StaticWindowMapManager"""
+ return self._region.map()
+
+ def is_valid(self):
+ """:return: True if we have a valid and usable region"""
+ return self._region is not None
+
+ def is_associated(self):
+ """:return: True if we are associated with a specific file already"""
+ return self._rlist is not None
+
+ def ofs_begin(self):
+ """:return: offset to the first byte pointed to by our cursor
+
+ **Note:** only if is_valid() is True"""
+ return self._region._b + self._ofs
+
+ def ofs_end(self):
+ """:return: offset to one past the last available byte"""
+ # unroll method calls for performance !
+ return self._region._b + self._ofs + self._size
+
+ def size(self):
+ """:return: amount of bytes we point to"""
+ return self._size
+
+ def region(self):
+ """:return: our mapped region, or None if nothing is mapped yet
+ :raise AssertionError: if we have no current region. This is only useful for debugging"""
+ return self._region
+
+ def includes_ofs(self, ofs):
+ """:return: True if the given absolute offset is contained in the cursors
+ current region
+
+ **Note:** cursor must be valid for this to work"""
+ # unroll methods
+ return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size)
+
+ def file_size(self):
+ """:return: size of the underlying file"""
+ return self._rlist.file_size()
+
+ def path_or_fd(self):
+ """:return: path or file descriptor of the underlying mapped file"""
+ return self._rlist.path_or_fd()
+
+ def path(self):
+ """:return: path of the underlying mapped file
+ :raise ValueError: if attached path is not a path"""
+ if isinstance(self._rlist.path_or_fd(), int):
+ raise ValueError("Path queried although mapping was applied to a file descriptor")
+ # END handle type
+ return self._rlist.path_or_fd()
+
+ def fd(self):
+ """:return: file descriptor used to create the underlying mapping.
+
+ **Note:** it is not required to be valid anymore
+ :raise ValueError: if the mapping was not created by a file descriptor"""
+ if isinstance(self._rlist.path_or_fd(), str):
+ raise ValueError("File descriptor queried although mapping was generated from path")
+ # END handle type
+ return self._rlist.path_or_fd()
+
+ #} END interface
+
+
+class StaticWindowMapManager:
+
+ """Provides a manager which will produce single size cursors that are allowed
+ to always map the whole file.
+
+ Clients must be written to specifically know that they are accessing their data
+ through a StaticWindowMapManager, as they otherwise have to deal with their window size.
+
+ These clients would have to use a SlidingWindowMapBuffer to hide this fact.
+
+ This type will always use a maximum window size, and optimize certain methods to
+ accommodate this fact"""
+
+ __slots__ = [
+ '_fdict', # mapping of path -> StorageHelper (of some kind
+ '_window_size', # maximum size of a window
+ '_max_memory_size', # maximum amount of memory we may allocate
+ '_max_handle_count', # maximum amount of handles to keep open
+ '_memory_size', # currently allocated memory size
+ '_handle_count', # amount of currently allocated file handles
+ ]
+
+ #{ Configuration
+ MapRegionListCls = MapRegionList
+ MapWindowCls = MapWindow
+ MapRegionCls = MapRegion
+ WindowCursorCls = WindowCursor
+ #} END configuration
+
+ _MB_in_bytes = 1024 * 1024
+
+ def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize):
+ """initialize the manager with the given parameters.
+ :param window_size: if -1, a default window size will be chosen depending on
+ the operating system's architecture. It will internally be quantified to a multiple of the page size
+ If 0, the window may have any size, which basically results in mapping the whole file at one
+ :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
+ If 0, a viable default will be set depending on the system's architecture.
+ It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
+ :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
+ Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
+ the manager will free as many handles as possible"""
+ self._fdict = dict()
+ self._window_size = window_size
+ self._max_memory_size = max_memory_size
+ self._max_handle_count = max_open_handles
+ self._memory_size = 0
+ self._handle_count = 0
+
+ if window_size < 0:
+ coeff = 64
+ if is_64_bit():
+ coeff = 1024
+ # END handle arch
+ self._window_size = coeff * self._MB_in_bytes
+ # END handle max window size
+
+ if max_memory_size == 0:
+ coeff = 1024
+ if is_64_bit():
+ coeff = 8192
+ # END handle arch
+ self._max_memory_size = coeff * self._MB_in_bytes
+ # END handle max memory size
+
+ #{ Internal Methods
+
+ def _collect_lru_region(self, size):
+ """Unmap the region which was least-recently used and has no client
+ :param size: size of the region we want to map next (assuming its not already mapped partially or full
+ if 0, we try to free any available region
+ :return: Amount of freed regions
+
+ .. Note::
+ We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation.
+ If the system runs out of memory, it will tell.
+
+ .. TODO::
+ implement a case where all unusued regions are discarded efficiently.
+ Currently its only brute force
+ """
+ num_found = 0
+ while (size == 0) or (self._memory_size + size > self._max_memory_size):
+ lru_region = None
+ lru_list = None
+ for regions in self._fdict.values():
+ for region in regions:
+ # check client count - if it's 1, it's just us
+ if (region.client_count() == 1 and
+ (lru_region is None or region._uc < lru_region._uc)):
+ lru_region = region
+ lru_list = regions
+ # END update lru_region
+ # END for each region
+ # END for each regions list
+
+ if lru_region is None:
+ break
+ # END handle region not found
+
+ num_found += 1
+ del(lru_list[lru_list.index(lru_region)])
+ lru_region.increment_client_count(-1)
+ self._memory_size -= lru_region.size()
+ self._handle_count -= 1
+ # END while there is more memory to free
+ return num_found
+
+ def _obtain_region(self, a, offset, size, flags, is_recursive):
+ """Utilty to create a new region - for more information on the parameters,
+ see MapCursor.use_region.
+ :param a: A regions (a)rray
+ :return: The newly created region"""
+ if self._memory_size + size > self._max_memory_size:
+ self._collect_lru_region(size)
+ # END handle collection
+
+ r = None
+ if a:
+ assert len(a) == 1
+ r = a[0]
+ else:
+ try:
+ r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags)
+ except Exception:
+ # apparently we are out of system resources or hit a limit
+ # As many more operations are likely to fail in that condition (
+ # like reading a file from disk, etc) we free up as much as possible
+ # As this invalidates our insert position, we have to recurse here
+ if is_recursive:
+ # we already tried this, and still have no success in obtaining
+ # a mapping. This is an exception, so we propagate it
+ raise
+ # END handle existing recursion
+ self._collect_lru_region(0)
+ return self._obtain_region(a, offset, size, flags, True)
+ # END handle exceptions
+
+ self._handle_count += 1
+ self._memory_size += r.size()
+ a.append(r)
+ # END handle array
+
+ assert r.includes_ofs(offset)
+ return r
+
+ #}END internal methods
+
+ #{ Interface
+ def make_cursor(self, path_or_fd):
+ """
+ :return: a cursor pointing to the given path or file descriptor.
+ It can be used to map new regions of the file into memory
+
+ **Note:** if a file descriptor is given, it is assumed to be open and valid,
+ but may be closed afterwards. To refer to the same file, you may reuse
+ your existing file descriptor, but keep in mind that new windows can only
+ be mapped as long as it stays valid. This is why the using actual file paths
+ are preferred unless you plan to keep the file descriptor open.
+
+ **Note:** file descriptors are problematic as they are not necessarily unique, as two
+ different files opened and closed in succession might have the same file descriptor id.
+
+ **Note:** Using file descriptors directly is faster once new windows are mapped as it
+ prevents the file to be opened again just for the purpose of mapping it."""
+ regions = self._fdict.get(path_or_fd)
+ if regions is None:
+ regions = self.MapRegionListCls(path_or_fd)
+ self._fdict[path_or_fd] = regions
+ # END obtain region for path
+ return self.WindowCursorCls(self, regions)
+
+ def collect(self):
+ """Collect all available free-to-collect mapped regions
+ :return: Amount of freed handles"""
+ return self._collect_lru_region(0)
+
+ def num_file_handles(self):
+ """:return: amount of file handles in use. Each mapped region uses one file handle"""
+ return self._handle_count
+
+ def num_open_files(self):
+ """Amount of opened files in the system"""
+ return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0)
+
+ def window_size(self):
+ """:return: size of each window when allocating new regions"""
+ return self._window_size
+
+ def mapped_memory_size(self):
+ """:return: amount of bytes currently mapped in total"""
+ return self._memory_size
+
+ def max_file_handles(self):
+ """:return: maximium amount of handles we may have opened"""
+ return self._max_handle_count
+
+ def max_mapped_memory_size(self):
+ """:return: maximum amount of memory we may allocate"""
+ return self._max_memory_size
+
+ #} END interface
+
+ #{ Special Purpose Interface
+
+ def force_map_handle_removal_win(self, base_path):
+ """ONLY AVAILABLE ON WINDOWS
+ On windows removing files is not allowed if anybody still has it opened.
+ If this process is ourselves, and if the whole process uses this memory
+ manager (as far as the parent framework is concerned) we can enforce
+ closing all memory maps whose path matches the given base path to
+ allow the respective operation after all.
+ The respective system must NOT access the closed memory regions anymore !
+ This really may only be used if you know that the items which keep
+ the cursors alive will not be using it anymore. They need to be recreated !
+ :return: Amount of closed handles
+
+ **Note:** does nothing on non-windows platforms"""
+ if sys.platform != 'win32':
+ return
+ # END early bailout
+
+ num_closed = 0
+ for path, rlist in self._fdict.items():
+ if path.startswith(base_path):
+ for region in rlist:
+ region.release()
+ num_closed += 1
+ # END path matches
+ # END for each path
+ return num_closed
+ #} END special purpose interface
+
+
+class SlidingWindowMapManager(StaticWindowMapManager):
+
+ """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily
+ obtain additional regions assuring there is no overlap.
+ Once a certain memory limit is reached globally, or if there cannot be more open file handles
+ which result from each mmap call, the least recently used, and currently unused mapped regions
+ are unloaded automatically.
+
+ **Note:** currently not thread-safe !
+
+ **Note:** in the current implementation, we will automatically unload windows if we either cannot
+ create more memory maps (as the open file handles limit is hit) or if we have allocated more than
+ a safe amount of memory already, which would possibly cause memory allocations to fail as our address
+ space is full."""
+
+ __slots__ = tuple()
+
+ def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize):
+ """Adjusts the default window size to -1"""
+ super().__init__(window_size, max_memory_size, max_open_handles)
+
+ def _obtain_region(self, a, offset, size, flags, is_recursive):
+ # bisect to find an existing region. The c++ implementation cannot
+ # do that as it uses a linked list for regions.
+ r = None
+ lo = 0
+ hi = len(a)
+ while lo < hi:
+ mid = (lo + hi) // 2
+ ofs = a[mid]._b
+ if ofs <= offset:
+ if a[mid].includes_ofs(offset):
+ r = a[mid]
+ break
+ # END have region
+ lo = mid + 1
+ else:
+ hi = mid
+ # END handle position
+ # END while bisecting
+
+ if r is None:
+ window_size = self._window_size
+ left = self.MapWindowCls(0, 0)
+ mid = self.MapWindowCls(offset, size)
+ right = self.MapWindowCls(a.file_size(), 0)
+
+ # we want to honor the max memory size, and assure we have anough
+ # memory available
+ # Save calls !
+ if self._memory_size + window_size > self._max_memory_size:
+ self._collect_lru_region(window_size)
+ # END handle collection
+
+ # we assume the list remains sorted by offset
+ insert_pos = 0
+ len_regions = len(a)
+ if len_regions == 1:
+ if a[0]._b <= offset:
+ insert_pos = 1
+ # END maintain sort
+ else:
+ # find insert position
+ insert_pos = len_regions
+ for i, region in enumerate(a):
+ if region._b > offset:
+ insert_pos = i
+ break
+ # END if insert position is correct
+ # END for each region
+ # END obtain insert pos
+
+ # adjust the actual offset and size values to create the largest
+ # possible mapping
+ if insert_pos == 0:
+ if len_regions:
+ right = self.MapWindowCls.from_region(a[insert_pos])
+ # END adjust right side
+ else:
+ if insert_pos != len_regions:
+ right = self.MapWindowCls.from_region(a[insert_pos])
+ # END adjust right window
+ left = self.MapWindowCls.from_region(a[insert_pos - 1])
+ # END adjust surrounding windows
+
+ mid.extend_left_to(left, window_size)
+ mid.extend_right_to(right, window_size)
+ mid.align()
+
+ # it can happen that we align beyond the end of the file
+ if mid.ofs_end() > right.ofs:
+ mid.size = right.ofs - mid.ofs
+ # END readjust size
+
+ # insert new region at the right offset to keep the order
+ try:
+ if self._handle_count >= self._max_handle_count:
+ raise Exception
+ # END assert own imposed max file handles
+ r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags)
+ except Exception:
+ # apparently we are out of system resources or hit a limit
+ # As many more operations are likely to fail in that condition (
+ # like reading a file from disk, etc) we free up as much as possible
+ # As this invalidates our insert position, we have to recurse here
+ if is_recursive:
+ # we already tried this, and still have no success in obtaining
+ # a mapping. This is an exception, so we propagate it
+ raise
+ # END handle existing recursion
+ self._collect_lru_region(0)
+ return self._obtain_region(a, offset, size, flags, True)
+ # END handle exceptions
+
+ self._handle_count += 1
+ self._memory_size += r.size()
+ a.insert(insert_pos, r)
+ # END create new region
+ return r