"""
filesequence - A parsing object representing sequential files for fileseq.
"""
from __future__ import annotations
import collections.abc
import dataclasses
import decimal
import fnmatch
import functools
import operator
import os
import re
import sys
import typing
from glob import iglob
from . import constants, utils
from .constants import (
PAD_STYLE_DEFAULT, PAD_MAP, REVERSE_PAD_MAP,
DISK_RE, DISK_SUB_RE, SPLIT_RE, SPLIT_SUB_RE,
PRINTF_SYNTAX_PADDING_RE, HOUDINI_SYNTAX_PADDING_RE,
UDIM_PADDING_PATTERNS)
from .exceptions import ParseException, FileSeqException
from .frameset import FrameSet
[docs]
class FileSequence:
""":class:`FileSequence` represents an ordered sequence of files.
Args:
sequence (str): (ie: dir/path.1-100#.ext)
Returns:
:class:`FileSequence`:
Raises:
:class:`fileseq.exceptions.MaxSizeException`: If frame size exceeds
``fileseq.constants.MAX_FRAME_SIZE``
"""
_base: str
_decimal_places: int
_dir: str
_ext: str
_frameSet: FrameSet|None
_frame_pad: str
_pad: str
_subframe_pad: str
DISK_RE = DISK_RE
DISK_SUB_RE = DISK_SUB_RE
PAD_MAP = PAD_MAP
REVERSE_PAD_MAP = REVERSE_PAD_MAP
SPLIT_RE = SPLIT_RE
SPLIT_SUB_RE = SPLIT_SUB_RE
_DEFAULT_PAD_CHAR = '@'
@dataclasses.dataclass
class _Components:
dir: str
base: str
frameSet: FrameSet|str|None
pad: str|int
ext: str
def __init__(self,
sequence: str,
pad_style: constants._PadStyle = PAD_STYLE_DEFAULT,
allow_subframes: bool = False):
"""Init the class
"""
sequence = utils.asString(sequence)
if not hasattr(self, '_frameSet'):
self._frameSet = None
if allow_subframes:
split_re = self.SPLIT_SUB_RE
disk_re = self.DISK_SUB_RE
else:
split_re = self.SPLIT_RE
disk_re = self.DISK_RE
try:
# the main case, padding characters in the path.1-100#.exr
path, frames, self._pad, self._ext = split_re.split(sequence, 1)
self._frame_pad, _, self._subframe_pad = self._pad.partition('.')
self._dir, self._base = os.path.split(path)
self._frameSet = FrameSet(frames)
except ValueError:
# edge case 1; we've got an invalid pad
for placeholder in self.PAD_MAP:
if placeholder in sequence:
msg = "Failed to parse FileSequence: {!r}"
raise ParseException(msg.format(sequence))
# edge case 2; we've got a single frame of a sequence
a_frame = disk_re.match(sequence)
if a_frame:
self._dir, self._base, frames, self._ext = a_frame.groups()
# edge case 3: we've got a single versioned file, not a sequence
if frames and not self._base.endswith('.'):
self._base = self._base + frames
self._pad = ''
self._frame_pad = ''
self._subframe_pad = ''
elif not frames:
self._pad = ''
self._frame_pad = ''
self._subframe_pad = ''
self._frameSet = None
else:
self._frameSet = FrameSet(frames)
if self._frameSet:
frame_num, _, subframe_num = frames.partition('.')
self._frame_pad = self.getPaddingChars(len(frame_num), pad_style=pad_style)
if subframe_num:
self._subframe_pad = self.getPaddingChars(len(subframe_num), pad_style=pad_style)
self._pad = '.'.join([self._frame_pad, self._subframe_pad])
else:
self._pad = self._frame_pad
self._subframe_pad = ''
else:
self._pad = ''
self._frame_pad = ''
self._subframe_pad = ''
self._frameSet = None
# edge case 4; we've got a solitary file, not a sequence
else:
path, self._ext = os.path.splitext(sequence)
self._dir, self._base = os.path.split(path)
self._pad = ''
self._frame_pad = ''
self._subframe_pad = ''
if self._dir:
self.setDirname(self._dir)
self._pad_style = pad_style
self._zfill = self.getPaddingNum(self._frame_pad, pad_style=pad_style)
self._decimal_places = self.getPaddingNum(self._subframe_pad, pad_style=pad_style)
# Round subframes to match sequence
if self._frameSet is not None and self._frameSet.hasSubFrames():
self._frameSet = FrameSet([
utils.quantize(frame, self._decimal_places)
for frame in self._frameSet
])
[docs]
def copy(self) -> FileSequence:
"""
Create a deep copy of this sequence
Returns:
:class:`.FileSequence`:
"""
fs = self.__class__.__new__(self.__class__)
fs.__dict__ = self.__dict__.copy()
fs._frameSet = None
if self._frameSet is not None:
fs._frameSet = self._frameSet.copy()
return fs
def _format(self, template: str) -> str:
# Potentially expensive if inverted range is large
# and user never asked for it in template
inverted = (self.invertedFrameRange() or "") if "{inverted}" in template else ""
return template.format(
basename=self.basename(),
extension=self.extension(), start=self.start(),
end=self.end(), length=len(self),
padding=self.padding(),
range=self.frameRange() or "",
inverted=inverted,
dirname=self.dirname())
[docs]
def split(self) -> list[FileSequence]:
"""
Split the :class:`FileSequence` into contiguous pieces and return them
as a list of :class:`FileSequence` instances.
Returns:
list[:class:`FileSequence`]:
"""
result = []
for frange in self.frameRange().split(","):
result.append(self.__class__(''.join(
(self._dir, self._base, frange, self._pad, self._ext))))
return result
[docs]
def dirname(self) -> str:
"""
Return the directory name of the sequence.
Returns:
str:
"""
return self._dir
[docs]
def setDirname(self, dirname: str) -> None:
"""
Set a new directory name for the sequence.
Args:
dirname (str): the new directory name
"""
# Make sure the dirname always ends in
# a path separator character
dirname = utils.asString(dirname)
sep = utils._getPathSep(dirname)
if not dirname.endswith(sep):
dirname = str(dirname) + sep
self._dir = dirname
[docs]
def basename(self) -> str:
"""
Return the basename of the sequence.
Returns:
str: sequence basename
"""
return self._base
[docs]
def setBasename(self, base: str) -> None:
"""
Set a new basename for the sequence.
Args:
base (str): the new base name
"""
self._base = utils.asString(base)
[docs]
def padStyle(self) -> constants._PadStyle:
"""
Return the padding style of the sequence.
See fileseq.constants.PAD_STYLE_HASH1 and fileseq.constants.PAD_STYLE_HASH4
Returns:
(`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
"""
return self._pad_style
[docs]
def setPadStyle(self, pad_style: constants._PadStyle, set_zfill: bool = False) -> None:
"""
Set new padding style for the sequence.
See fileseq.constants.PAD_STYLE_HASH1 and fileseq.constants.PAD_STYLE_HASH4
The default behavior converts only the padding characters representation per the new style,
the same zfill/decimalPlaces value. If ``set_zfill=True``, convert the zfill/decimalPlaces
values to match the meaning of the padding characters per the new style.
Args:
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style to set
set_zfill (bool): If True, convert zfill/decimalPlaces value instead of padding chars
"""
if set_zfill:
zfill = self.getPaddingNum(self._frame_pad, pad_style=pad_style)
decimal_places = self.getPaddingNum(self._subframe_pad, pad_style=pad_style)
self._pad_style = pad_style
self._zfill = zfill
self._decimal_places = decimal_places
return
decimal_places = self._decimal_places
frame_pad = self.getPaddingChars(self._zfill, pad_style=pad_style)
if decimal_places:
subframe_pad = self.getPaddingChars(decimal_places, pad_style=pad_style)
pad = '.'.join([frame_pad, subframe_pad])
else:
subframe_pad = ''
pad = frame_pad
self._pad_style = pad_style
self._pad = pad
self._frame_pad = frame_pad
self._subframe_pad = subframe_pad
[docs]
def padding(self) -> str:
"""
Return the padding characters in the sequence.
Returns:
str: sequence padding
"""
return self._pad
[docs]
def setPadding(self, padding: str) -> None:
"""
Set new padding characters for the sequence.
i.e. "#" or "@@@" or '%04d', or an empty string to disable range formatting.
Args:
padding (str): sequence padding to set
Raises:
ValueError: if unrecognized padding characters are provided
"""
pad_style = self._pad_style
frame_pad, _, subframe_pad = padding.partition('.')
zfill = self.getPaddingNum(frame_pad, pad_style=pad_style)
decimal_places = self.getPaddingNum(subframe_pad, pad_style=pad_style)
# Set all fields atomically after parsing valid padding characters
self._pad = padding
self._frame_pad = frame_pad
self._subframe_pad = subframe_pad
self._zfill = zfill
self._decimal_places = decimal_places
[docs]
def framePadding(self) -> str:
"""
Return the padding characters in the sequence.
Returns:
str: sequence padding
"""
return self._frame_pad
[docs]
def setFramePadding(self, padding: str) -> None:
"""
Set new padding characters for the frames of the sequence.
i.e. "#" or "@@@" or '%04d', or an empty string to disable range formatting.
Args:
padding (str): sequence padding to set
Raises:
ValueError: if unrecognized padding characters are provided
"""
subframe_pad = self._subframe_pad
pad_style = self._pad_style
if subframe_pad:
pad = '.'.join([padding, subframe_pad])
else:
pad = padding
zfill = self.getPaddingNum(padding, pad_style=pad_style)
# Set all fields atomically after parsing valid padding characters
self._frame_pad = padding
self._pad = pad
self._zfill = zfill
[docs]
def subframePadding(self) -> str:
"""
Return the padding characters for subframes in the sequence.
Returns:
str: sequence padding
"""
return self._subframe_pad
[docs]
def setSubframePadding(self, padding: str) -> None:
"""
Set new padding characters for the subframes in the sequence.
i.e. "#" or "@@@", or an empty string to disable range
formatting.
Args:
padding (str): sequence padding to set
Raises:
ValueError: if unrecognized padding characters are provided
"""
frame_pad = self._frame_pad
subframe_pad = padding
pad_style = self._pad_style
if subframe_pad:
pad = '.'.join([frame_pad, subframe_pad])
else:
pad = frame_pad
decimal_places = self.getPaddingNum(subframe_pad, pad_style=pad_style)
# Set all fields atomically after parsing valid padding characters
self._subframe_pad = subframe_pad
self._pad = pad
self._decimal_places = decimal_places
[docs]
def frameSet(self) -> FrameSet|None:
"""
Return the :class:`.FrameSet` of the sequence if specified,
otherwise None.
Returns:
:class:`.FrameSet` or None:
"""
return self._frameSet
[docs]
def setFrameSet(self, frameSet: FrameSet|None) -> None:
"""
Set a new :class:`.FrameSet` for the sequence.
Args:
frameSet (:class:`.FrameSet`): the new :class:`.FrameSet` object
"""
if frameSet is not None and frameSet.hasSubFrames():
if all(isinstance(frame, decimal.Decimal) for frame in frameSet):
frameSet = FrameSet([
utils.quantize(frame, self._decimal_places)
for frame in frameSet
])
self._frameSet = frameSet
if not self._pad:
self.setPadding(self._DEFAULT_PAD_CHAR)
[docs]
def extension(self) -> str:
"""
Return the file extension of the sequence, including leading period.
Returns:
str:
"""
return self._ext
[docs]
def setExtension(self, ext: str) -> None:
"""
Set a new file extension for the sequence.
Note:
A leading period will be added if none is provided.
Args:
ext (str): the new file extension
"""
if ext and ext[0] != ".":
ext = "." + ext
self._ext = utils.asString(ext)
[docs]
def setExtention(self, ext: str) -> None:
"""
Deprecated: use :meth:`setExtension`.
Args:
ext (str):
"""
import warnings
msg = "the setExtention method is deprecated, please use setExtension"
warnings.warn(msg)
self.setExtension(ext)
[docs]
def frameRange(self) -> str:
"""
Returns the string formatted frame range of the sequence.
Will return an empty string if the sequence has no frame pattern.
Returns:
str:
"""
if not self._frameSet:
return ''
return self._frameSet.frameRange(self._zfill, self._decimal_places)
[docs]
def setFrameRange(self, frange: typing.Any) -> None:
"""
Set a new frame range for the sequence.
Args:
frange (str): a properly formatted frame range, as per :class:`.FrameSet`
"""
self._frameSet = FrameSet(frange)
if not self._pad:
self.setPadding(self._DEFAULT_PAD_CHAR)
[docs]
def invertedFrameRange(self) -> str:
"""
Returns the inverse string formatted frame range of the sequence.
Will return an empty string if the sequence has no frame pattern,
or the frame range includes subframes.
Returns:
str:
Raises:
:class:`fileseq.exceptions.MaxSizeException`: If new inverted range
exceeded ``fileseq.constants.MAX_FRAME_SIZE``
"""
if not self._frameSet or self._frameSet.hasSubFrames():
return ''
return self._frameSet.invertedFrameRange(self._zfill)
[docs]
def start(self) -> int:
"""
Returns the start frame of the sequence's :class:`.FrameSet`.
Will return 0 if the sequence has no frame pattern.
Returns:
int:
"""
if not self._frameSet:
return 0
return self._frameSet.start()
[docs]
def end(self) -> int:
"""
Returns the end frame of the sequences :class:`.FrameSet`.
Will return 0 if the sequence has no frame pattern.
Returns:
int:
"""
if not self._frameSet:
return 0
return self._frameSet.end()
[docs]
def zfill(self) -> int:
"""
Returns the zfill depth (ie the number of zeroes to pad with).
Returns:
int:
"""
return self._zfill
[docs]
def decimalPlaces(self) -> int:
"""
Returns the number of decimal places to output.
Returns:
int or None:
"""
return self._decimal_places
[docs]
def frame(self, frame: int|float|decimal.Decimal|str) -> str:
"""
Return a path for the given frame in the sequence. Numeric values or
numeric strings are treated as a frame number and padding is applied,
all other values are passed though.
Examples:
>>> seq = FileSequence('/foo/bar.1-10#.exr')
>>> seq.frame(1)
'/foo/bar.0001.exr'
>>> seq.frame("#")
'/foo/bar.#.exr'
Args:
frame (int, float, decimal.Decimal or str): the desired frame number
or a char to pass through (ie. #)
Returns:
str:
"""
zframe: object = None
if self._zfill == 0:
# There may have been no placeholder for frame IDs in
# the sequence, in which case we don't want to insert
# a frame ID
zframe = ""
else:
if not isinstance(frame, (int, float, decimal.Decimal)):
try:
frame = int(frame)
except ValueError:
try:
frame = decimal.Decimal(frame)
except decimal.DecimalException:
zframe = frame
if zframe is None:
zframe = utils.pad(frame, self._zfill, self._decimal_places)
return str("".join((self._dir, self._base, str(zframe), self._ext)))
[docs]
def index(self, idx: int) -> str:
"""
Return the path to the file at the given index.
Args:
idx (int): the desired index
Returns:
str:
"""
return self.__getitem__(idx) # type: ignore
[docs]
def batches(self, batch_size: int, paths: bool = False) -> typing.Iterable[str | FileSequence]:
"""
Returns a generator that yields groups of file paths, up to ``batch_size``.
Convenience method for ``fileseq.utils.batchIterable(self, batch_size)``
If ``paths=False``, each batch is a new ``FileSequence`` subrange.
If ``paths=True``, each batch is an islice generator object of each file
path in the subrange.
Args:
batch_size (int): max file paths in each batch
paths (bool): if True, generate individual file paths instead of FileSequences
Returns:
generator: yields batches of file paths or FileSequence subranges of sequence
"""
if len(self) == 0:
return []
if paths:
# They just want batches of the individual file paths
return utils.batchIterable(self, batch_size)
# generate batches of index ranges for the current sequence
frame_gen = utils.batchFrames(0, len(self) - 1, batch_size)
return (self[f.start:f.stop + 1] for f in frame_gen)
[docs]
def __setstate__(self, state: typing.Any) -> None:
"""
Allows for de-serialization from a pickled :class:`FileSequence`.
Args:
state (dict): Pickle dictionary produced by default pickle implementation
"""
for name, value in state.items():
self.__dict__[name] = value
self.__dict__.setdefault('_pad_style', PAD_STYLE_DEFAULT)
self.__dict__.setdefault('_frame_pad', self._pad)
self.__dict__.setdefault('_subframe_pad', '')
self.__dict__.setdefault('_decimal_places', 0)
[docs]
def to_dict(self) -> dict[str, typing.Any]:
"""
Convert sequence object into a state dict that is suitable for
further serialization, such as to JSON
Returns:
dict: state of the current sequence object
"""
state = self.__dict__.copy()
state['_pad_style'] = str(self._pad_style)
state['_frameSet'] = None
if self._frameSet is not None:
state['_frameSet'] = self._frameSet.__getstate__()
return state
[docs]
@classmethod
def from_dict(cls, state: dict[str, typing.Any]) -> FileSequence:
"""
Constructor to create a new sequence object from a state
that was previously returned by :meth:`FileSequence.to_dict`
Args:
state (dict): state returned from :meth:`FileSequence.to_dict`
Returns:
:obj:`FileSequence`
"""
state = state.copy()
frameSet = FrameSet.__new__(FrameSet)
frameSet.__setstate__(tuple(state['_frameSet']))
padStyle = constants._PadStyle(state['_pad_style'])
if padStyle not in REVERSE_PAD_MAP:
raise ValueError("bad pad style constant value %r" % padStyle)
state['_pad_style'] = padStyle
state['_frameSet'] = frameSet
fs = cls.__new__(cls)
fs.__setstate__(state)
return fs
[docs]
def __iter__(self) -> collections.abc.Generator[str, None, None]:
"""
Allow iteration over the path or paths this :class:`FileSequence`
represents.
Yields:
str: path
"""
# If there is no frame range, or there is no padding
# characters, then we only want to represent a single path
if not self._frameSet or not self._zfill:
yield utils.asString(self)
return
for f in self._frameSet:
yield self.frame(f)
[docs]
def __getitem__(self, idx: typing.Any) -> str|FileSequence:
"""
Allows indexing and slicing into the underlying :class:`.FrameSet`
When indexing, a string filepath is returns for the frame.
When slicing, a new :class:`FileSequence` is returned.
Slicing outside the range of the sequence results in an
IndexError
Args:
idx (int or slice): the desired index
Returns:
str or :obj:`FileSequence`:
Raises:
:class:`IndexError`: If slice is outside the range of the sequence
"""
if not self._frameSet:
return str(self)
frames = self._frameSet[idx]
if not hasattr(idx, 'start'):
return self.frame(frames)
fset = FrameSet(frames)
if fset.is_null:
raise IndexError("slice is out of range and returns no frames")
fs = self.copy()
fs.setFrameSet(fset)
return fs
[docs]
def __len__(self) -> int:
"""
The length (number of files) represented by this :class:`FileSequence`.
Returns:
int:
"""
if not self._frameSet or not self._zfill:
return 1
return len(self._frameSet)
[docs]
def __str__(self) -> str:
"""
String representation of this :class:`FileSequence`.
Note:
A FileSequence that does not define a frame range will omit
the padding character component when string formatted, even
if the padding character is set.
For more control over the exact string format, use the
:obj:`FileSequence.format()` method.
Returns:
str:
"""
cmpts = self.__components()
cmpts.frameSet = utils.asString(cmpts.frameSet or "")
return "".join(dataclasses.astuple(cmpts))
def __repr__(self) -> str:
try:
return "<%s: %r>" % (self.__class__.__name__, self.__str__())
except TypeError:
return super(self.__class__, self).__repr__()
def __eq__(self, other: typing.Any) -> bool:
if not isinstance(other, FileSequence):
return str(self) == str(other)
a = self.__components()
b = other.__components()
a.pad = self.getPaddingNum(str(a.pad))
b.pad = other.getPaddingNum(str(b.pad))
return a == b
def __ne__(self, other: typing.Any) -> bool:
return not self.__eq__(other)
def __hash__(self) -> int:
# TODO: Technically we should be returning None,
# as this class is mutable and cannot reliably be hashed.
# Python2 allows it without this definition.
# Python3 fails with TypeError: unhashable.
# For now, preserving the hashing behaviour in py3.
return id(self)
def __components(self) -> _Components:
return self._Components(
self._dir,
self._base,
self._frameSet or "",
self._pad if self._frameSet else "",
self._ext,
)
[docs]
@classmethod
def yield_sequences_in_list(
cls,
paths: typing.Iterable[str],
using: FileSequence|None = None,
pad_style: constants._PadStyle = PAD_STYLE_DEFAULT,
allow_subframes: bool = False) -> typing.Iterator[FileSequence]:
"""
Yield the discrete sequences within paths. This does not try to
determine if the files actually exist on disk, it assumes you already
know that.
A template :obj:`FileSequence` object can also be provided via the
``using`` parameter. Given this template, the dirname, basename, and
extension values will be used to extract the frame value from the paths
instead of parsing each path from scratch.
Examples:
The ``using`` field can supply a template for extracting the frame
component from the paths::
paths = [
'/dir/file_001.0001.ext',
'/dir/file_002.0001.ext',
'/dir/file_003.0001.ext',
]
template = FileSequence('/dir/file_#.0001.ext')
seqs = FileSequence.yield_sequences_in_list(paths, using)
# [<FileSequence: '/dir/file_1-3@@@.0001.ext'>]
Args:
paths (list[str]): a list of paths
using (:obj:`FileSequence`): Optional sequence to use as template
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
allow_subframes (bool): if True, handle subframe filenames
Yields:
:obj:`FileSequence`:
"""
seqs: dict[tuple[str, str, str, int], set[str]] = {}
if allow_subframes:
_check = cls.DISK_SUB_RE.match
else:
_check = cls.DISK_RE.match
if isinstance(using, FileSequence):
dirname, basename, ext = using.dirname(), using.basename(), using.extension()
head: int = len(dirname + basename)
tail: int = -len(ext)
frames: set[str] = set()
path: str
for path in filter(None, map(utils.asString, paths)):
frame = path[head:tail]
try:
int(frame)
except ValueError:
if not allow_subframes:
continue
try:
decimal.Decimal(frame)
except decimal.DecimalException:
continue
_, _, subframe = frame.partition(".")
key = (dirname, basename, ext, len(subframe))
seqs.setdefault(key, frames).add(frame)
else:
for match in filter(None, map(_check, map(utils.asString, paths))):
dirname, basename, frame, ext = match.groups()
if not basename and not ext:
continue
if frame:
_, _, subframe = frame.partition(".")
key = (dirname, basename, ext, len(subframe))
else:
key = (dirname, basename, ext, 0)
seqs.setdefault(key, set())
if frame:
seqs[key].add(frame)
def start_new_seq() -> FileSequence:
seq = cls.__new__(cls)
seq._dir = dirname or ''
seq._base = basename or ''
seq._ext = ext or ''
return seq
def finish_new_seq(seq: FileSequence) -> None:
if seq._subframe_pad:
seq._pad = '.'.join([seq._frame_pad, seq._subframe_pad])
else:
seq._pad = seq._frame_pad
seq.__init__(utils.asString(seq), pad_style=pad_style, # type: ignore[misc]
allow_subframes=allow_subframes)
def get_frame_width(frame_str: str) -> int:
frame_num, _, _ = frame_str.partition(".")
return len(frame_num)
def get_frame_minwidth(frame_str: str) -> int:
# find the smallest padding width for a frame string
frame_num, _, _ = frame_str.partition(".")
size = len(frame_num)
num = int(frame_num)
num_size = len(str(num))
if size == num_size:
return 1
return size
def frames_to_seq(frames: typing.Iterable[str], pad_length: int, decimal_places: int) -> FileSequence:
seq = start_new_seq()
seq._frameSet = FrameSet(sorted(decimal.Decimal(f) for f in frames))
seq._frame_pad = cls.getPaddingChars(pad_length, pad_style=pad_style)
if decimal_places:
seq._subframe_pad = cls.getPaddingChars(decimal_places, pad_style=pad_style)
else:
seq._subframe_pad = ''
finish_new_seq(seq)
return seq
for (dirname, basename, ext, decimal_places), frames in seqs.items():
# Short-circuit logic if we do not have multiple frames, since we
# only need to build and return a single simple sequence
if not frames:
seq = start_new_seq()
seq._frameSet = None
seq._frame_pad = ''
seq._subframe_pad = ''
finish_new_seq(seq)
yield seq
continue
# If we have multiple frames, then we need to check them for different
# padding and possibly yield more than one sequence.
# sort the frame list by their string padding width
sorted_frames = sorted(((get_frame_width(f), f) for f in frames), key=operator.itemgetter(0))
current_frames: list[str] = []
current_width = -1
for width, frame in sorted_frames:
# initialize on first item
if current_width < 0:
current_width = width
if width != current_width and get_frame_minwidth(frame) > current_width:
# We have a new padding length.
# Commit the current sequence, and then start a new one.
yield frames_to_seq(current_frames, current_width, decimal_places)
# Start tracking the next group of frames using the new length
current_frames = [frame]
current_width = width
continue
current_frames.append(frame)
# Commit the remaining frames as a sequence
if current_frames:
yield frames_to_seq(current_frames, current_width, decimal_places)
[docs]
@classmethod
def findSequencesInList(cls,
paths: typing.Iterable[str],
pad_style: constants._PadStyle = PAD_STYLE_DEFAULT,
allow_subframes: bool = False) -> list[FileSequence]:
"""
Returns the list of discrete sequences within paths. This does not try
to determine if the files actually exist on disk, it assumes you
already know that.
Args:
paths (list[str]): a list of paths
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
allow_subframes (bool): if True, handle subframe filenames
Returns:
list:
"""
return list(
cls.yield_sequences_in_list(paths, pad_style=pad_style, allow_subframes=allow_subframes)
)
[docs]
@classmethod
def findSequencesOnDisk(
cls,
pattern: str,
include_hidden: bool = False,
strictPadding: bool = False,
pad_style: constants._PadStyle = PAD_STYLE_DEFAULT,
allow_subframes: bool = False) -> list[FileSequence]:
"""
Yield the sequences found in the given directory.
Examples::
FileSequence.findSequencesOnDisk('/path/to/files')
The `pattern` can also specify glob-like shell wildcards including the following:
* ``?`` - 1 wildcard character
* ``*`` - 1 or more wildcard character
* ``{foo,bar}`` - either 'foo' or 'bar'
Exact frame ranges are not considered, and padding characters are converted to
wildcards (``#`` or ``@``)
Case-sensitive matching follows POSIX behavior, even on Windows platforms.
"file.1.png" and "file.2.PNG" result in two different sequences.
Examples::
FileSequence.findSequencesOnDisk('/path/to/files/image_stereo_{left,right}.#.jpg')
FileSequence.findSequencesOnDisk('/path/to/files/imag?_*_{left,right}.@@@.jpg', strictPadding=True)
Args:
pattern (str): directory to scan, or pattern to filter in directory
include_hidden (bool): if true, show .hidden files as well
strictPadding (bool): if True, ignore files with padding length different from pattern
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
allow_subframes (bool): if True, handle subframe filenames
Returns:
list:
"""
# reserve some functions we're going to need quick access to
_not_hidden = lambda f: not f.startswith('.')
_match_pattern = None
_filter_padding = None
_join = os.path.join
seq = None
dirpath = pattern
# Support the pattern defining a filter for the files
# in the existing directory
if not os.path.isdir(pattern):
dirpath, filepat = os.path.split(pattern)
if not os.path.isdir(dirpath):
return []
# Start building a regex for filtering files
seq = cls(filepat, pad_style=pad_style, allow_subframes=allow_subframes)
patt = r'\A'
patt += cls._globCharsToRegex(seq.basename())
if seq.padding():
patt += '('
if seq.framePadding():
patt += r'\d+'
if seq.subframePadding():
patt += r'\.\d+'
patt += ')'
if seq.extension():
patt += cls._globCharsToRegex(seq.extension())
# Convert braces groups into regex capture groups
matches = re.finditer(r'{(.*?)(?:,(.*?))*}', patt)
for match in reversed(list(matches)):
i, j = match.span()
regex = '(?:%s)' % '|'.join([m.strip() for m in match.groups()])
patt = "".join((patt[0:i], regex, patt[j:]))
patt += r'\Z'
try:
_match_pattern = re.compile(patt).match
except re.error:
msg = 'Invalid file pattern: {!r}'.format(filepat)
raise FileSeqException(msg)
if seq.padding() and strictPadding:
get_frame = lambda f: _match_pattern(f).group(1) # type: ignore
_filter_padding = functools.partial(
cls._filterByPaddingNum,
zfill=seq.zfill(),
decimal_places=seq.decimalPlaces(),
get_frame=get_frame
)
# Get just the immediate files under the dir.
# Avoids testing the os.listdir() for files as
# a second step.
ret = next(os.walk(dirpath), None)
files: typing.Iterable[str] = ret[-1] if ret else []
# collapse some generators to get us the files that match our regex
if not include_hidden:
files = filter(_not_hidden, files)
# Filter by files that match the provided file pattern
if _match_pattern:
files = filter(_match_pattern, files)
# Filter by files that match the frame padding in the file pattern
if _filter_padding:
# returns a generator
files = _filter_padding(files)
# Ensure our dirpath ends with a path separator, so
# that we can control which sep is used during the
# os.path.join
sep = utils._getPathSep(dirpath)
if not dirpath.endswith(sep):
dirpath += sep
files = [_join(dirpath, f) for f in files]
seqs = list(
cls.yield_sequences_in_list(files, pad_style=pad_style, allow_subframes=allow_subframes)
)
if _filter_padding and seq:
frame_pad = cls.conformPadding(seq.framePadding(), pad_style=pad_style)
subframe_pad = cls.conformPadding(seq.subframePadding(), pad_style=pad_style)
# strict padding should preserve the original padding
# characters in the found sequences.
for s in seqs:
s.setFramePadding(frame_pad)
s.setSubframePadding(subframe_pad)
return seqs
[docs]
@classmethod
def findSequenceOnDisk(
cls,
pattern: str,
strictPadding: bool = False,
pad_style: constants._PadStyle = PAD_STYLE_DEFAULT,
allow_subframes: bool = False,
force_case_sensitive: bool = True,
preserve_padding: bool = False) -> FileSequence:
"""
Search for a specific sequence on disk.
The padding characters used in the `pattern` are used to filter the
frame values of the files on disk (if `strictPadding` is True).
Case-sensitive matching follows POSIX behavior, even on Windows platforms.
"file.1.png" and "file.2.PNG" result in two different sequences.
This behavior can be disabled on Windows by setting `force_case_sensitive=False`.
By default, the returned sequence will use the "#@" padding character format.
If ``preserve_padding=True``, then preserve the original `pattern` padding character
format, as long as the padding length matches the existing sequence. In the case of
``strictPadding=False`` and the original padding length not matching the existing
sequence, then the "#@" format will still be used in the result.
Examples:
Find sequence matching basename and extension, and a wildcard for
any frame.
returns bar.1.exr bar.10.exr, bar.100.exr, bar.1000.exr, inclusive:
``FileSequence.findSequenceOnDisk("seq/bar@@@@.exr")``
Find exactly 4-padded sequence, i.e. seq/bar1-100#.exr
returns only frames bar1000.exr through bar9999.exr
``FileSequence.findSequenceOnDisk("seq/bar#.exr", strictPadding=True)``
Find exactly 3-padded sequence, i.e. seq/bar1-3%03d.exr and
return sequence that preserves the original printf padding format
``FileSequence.findSequenceOnDisk("seq/bar%03d.exr", strictPadding=True, preserve_padding=True)``
Note:
Unlike `findSequencesOnDisk`, general wildcard characters ("*", "?") are not
supported and result in undefined behavior. Only the frame component of the paths may
be replaced with padding characters to serve as a limited wildcard.
Args:
pattern (str): the sequence pattern being searched for
strictPadding (bool): if True, ignore files with padding length different from `pattern`
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
allow_subframes (bool): if True, handle subframe filenames
force_case_sensitive (bool): force posix-style case-sensitive matching on Windows filesystems
preserve_padding (bool): if True, preserve pattern-provided padding characters in returned
sequence, if the padding length matches. Default: conform padding to "#@" style.
Returns:
FileSequence: A single matching file sequence existing on disk
Raises:
:class:`.FileSeqException`: if no sequence is found on disk
"""
seq = cls(pattern, allow_subframes=allow_subframes, pad_style=pad_style)
if seq.frameRange() == '' and seq.padding() == '':
if os.path.isfile(pattern):
return seq
patt = seq.format('{dirname}{basename}*{extension}')
dirname = seq.dirname()
basename = seq.basename()
ext = seq.extension()
pad = seq.padding()
frame_pad = seq.framePadding()
subframe_pad = seq.subframePadding()
globbed = iglob(patt)
if sys.platform == 'win32':
# apply normpath in either case, as glob on windows could lead to
# mixed path separators: path/foo\\bar.ext
normpath = os.path.normpath
globbed = (normpath(p) for p in globbed)
if force_case_sensitive:
# windows: treat pattern matches as case-sensitive to align
# with posix behavior
patt = normpath(patt)
case_match = re.compile(fnmatch.translate(patt)).match
globbed = (p for p in globbed if case_match(p))
pad_filter_ctx = cls._FilterByPaddingNum()
if pad:
patt = r'\A'
if dirname:
patt = r'.*[/\\]'
patt += re.escape(basename) + '(.*)' + re.escape(ext) + r'\Z'
def get_frame(f: str) -> str:
m = re.match(patt, f, re.I)
if not m:
raise ValueError(f'no frame match: str={f}, pattern={patt}')
return m.group(1)
if strictPadding:
globbed = pad_filter_ctx(
globbed,
seq.zfill(),
decimal_places=seq.decimalPlaces(),
get_frame=get_frame
)
if not preserve_padding:
frame_pad = cls.conformPadding(frame_pad, pad_style=pad_style)
subframe_pad = cls.conformPadding(subframe_pad, pad_style=pad_style)
else:
globbed = pad_filter_ctx(
globbed,
None,
decimal_places=seq.decimalPlaces(),
get_frame=get_frame
)
sequences = []
allow_subframes = bool(seq.decimalPlaces())
for match in cls.yield_sequences_in_list(
globbed, using=seq, pad_style=pad_style, allow_subframes=allow_subframes
):
if match.basename() == basename and match.extension() == ext:
if pad:
if strictPadding:
# The match is already using the same padding width as the
# original pattern, so we can just propagate the padding
# characters. Depending on whether preserve_padding was set,
# the padding characters may or may not have been conformed
match.setFramePadding(frame_pad)
match.setSubframePadding(subframe_pad)
elif preserve_padding:
# The match was not guaranteed to have the same padding
# width as the original pattern. But if we can check that
# they are equal then we can preserve the original padding
# characters
if seq.zfill() == match.zfill():
match.setFramePadding(frame_pad)
elif not pad_filter_ctx.has_padded_frames and seq.zfill() <= match.zfill():
match.setFramePadding(frame_pad)
if seq.decimalPlaces() == match.decimalPlaces():
match.setSubframePadding(subframe_pad)
elif not pad_filter_ctx.has_padded_subframes and seq.decimalPlaces() <= match.decimalPlaces():
match.setSubframePadding(subframe_pad)
sequences.append(match)
if len(sequences) == 1:
return sequences[0]
elif not sequences:
msg = 'no sequence found on disk matching {0}'
else:
msg = 'multiple sequences found on disk matching {0}'
raise FileSeqException(msg.format(pattern))
@staticmethod
def _globCharsToRegex(filename: str) -> str:
"""
Translate single character elements of a shell pattern to make suitable
for a regular expression pattern
Args:
filename (str): filename containing shell pattern to convert
Returns:
str:
"""
filename = filename.replace('.', r'\.')
filename = filename.replace('*', '.*')
filename = filename.replace('?', '.')
return filename
class _FilterByPaddingNum(object):
def __init__(self) -> None:
# Tracks whether a padded frame has been yielded:
# padded: file.0001.ext
# not padded: file.1001.ext
self.has_padded_frames = False
self.has_padded_subframes = False
def __call__(self,
iterable: typing.Iterable[str],
zfill: int|None,
decimal_places: typing.Optional[int] = 0,
get_frame: typing.Optional[typing.Callable[[str], str]] = None
) -> collections.abc.Generator[str, None, None]:
"""
Yield only path elements from iterable which have a frame padding that
matches the given target padding numbers. If zfill is None only the
subframe length is matched against decimal places. If provided get_frame
should be a callable taking one argument that will extract the frame
number from a filename.
Args:
iterable (collections.Iterable):
zfill (int or None):
decimal_places (int):
get_frame (callable):
Yields:
str:
"""
if decimal_places == 0:
_check = FileSequence.DISK_RE.match
else:
_check = FileSequence.DISK_SUB_RE.match
self.has_padded_frames = False
self.has_padded_subframes = False
has_padded_frame = False
has_padded_subframe = False
def check_padded(frame: str) -> bool:
if frame and (frame[0] == '0' or frame[:2] == '-0'):
return True
return False
def set_has_padded() -> None:
if has_padded_frame:
self.has_padded_frames = True
if has_padded_subframe:
self.has_padded_subframes = True
for item in iterable:
# Add a filter for paths that don't match the frame
# padding of a given number
matches = _check(item)
if not matches:
if zfill is None or zfill <= 0:
# Not a sequence pattern, but we were asked
# to match on a zero padding
yield item
continue
# Ensure DISK_RE matches before calling optional get_frame function
frame = matches.group(3) or ''
if frame and get_frame is not None:
frame = get_frame(item) or ''
if not frame:
if zfill is None or zfill <= 0:
# No frame value was parsed, but we were asked
# to match on a zero padding
yield item
continue
# We have a frame number
frame, _, subframe = frame.partition(".")
if len(subframe) != decimal_places:
continue
has_padded_frame = check_padded(frame)
has_padded_subframe = check_padded(subframe)
if zfill is None:
set_has_padded()
# We have a parsed frame but are not filtering by
# a specific zfill
yield item
continue
if has_padded_frame:
if len(frame) == zfill:
set_has_padded()
# A frame leading with '0' is explicitly
# padded and can only be a match if its exactly
# the target padding number
yield item
continue
if len(frame) >= zfill:
set_has_padded()
# A frame that does not lead with '0' can match
# a padding width >= to the target padding number
yield item
continue
@classmethod
def _filterByPaddingNum(cls, *args, **kwargs) -> typing.Generator[str]: # type: ignore
ctx = cls._FilterByPaddingNum()
return ctx(*args, **kwargs)
[docs]
@classmethod
def getPaddingChars(cls, num: int, pad_style: constants._PadStyle = PAD_STYLE_DEFAULT) -> str:
"""
Given a particular amount of padding, return the proper padding characters.
Args:
num (int): required width of string with padding
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
Returns:
str:
"""
num = max(1, num)
reverse_pad_map = cls.REVERSE_PAD_MAP[pad_style]
# Find the widest padding character that can be used alone
for width in sorted(reverse_pad_map, reverse=True):
if num % width == 0:
return reverse_pad_map[width] * (num // width)
# Should never reach here as all styles should have an entry for width 1
raise FileSeqException('REVERSE_PAD_MAP missing pad character for width 1')
[docs]
@classmethod
def getPaddingNum(cls, chars: str, pad_style: constants._PadStyle = PAD_STYLE_DEFAULT) -> int:
"""
Given a supported group of padding characters, return the amount of padding.
Args:
chars (str): a supported group of padding characters
pad_style (`PAD_STYLE_DEFAULT` or `PAD_STYLE_HASH1` or `PAD_STYLE_HASH4`): padding style
Returns:
int:
Raises:
ValueError: if unsupported padding character is detected
"""
if not chars:
return 0
if chars in UDIM_PADDING_PATTERNS:
return 4
match = PRINTF_SYNTAX_PADDING_RE.match(chars) or HOUDINI_SYNTAX_PADDING_RE.match(chars)
if match:
paddingNumStr = match.group(1)
paddingNum = int(paddingNumStr) if paddingNumStr else 1
return max(paddingNum, 1)
char = ''
rval = 0
try:
for char in chars:
rval += cls.PAD_MAP[char][pad_style]
return rval
except KeyError:
msg = "Detected an unsupported padding character: \"{}\"."
msg += " Supported padding characters: {}, printf, houdini or UDIM syntax padding"
msg += " %<int>d"
raise ValueError(msg.format(char, utils.asString(list(cls.PAD_MAP))))