Skip to content

Commit

Permalink
ignore typical temporary files when listing directories
Browse files Browse the repository at this point in the history
Operating systems love to cluter the file system with all kinds of
cruft. This adds a gitignore like config to skip those files when
listing files.
  • Loading branch information
splitbrain committed Mar 13, 2024
1 parent 5a77ba3 commit d6a45e5
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 0 deletions.
37 changes: 37 additions & 0 deletions Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class Crawler
/** @var bool */
protected $sortreverse = false;

/** @var string[] patterns to ignore */
protected $ignore = [];

/**
* Initializes the crawler
*
Expand All @@ -24,6 +27,8 @@ public function __construct($extensions)
$this->ext = array_map('trim', $this->ext);
$this->ext = array_map('preg_quote_cb', $this->ext);
$this->ext = implode('|', $this->ext);

$this->ignore = $this->loadIgnores();
}

public function setSortBy($sortby)
Expand Down Expand Up @@ -67,6 +72,9 @@ public function crawl($root, $local, $pattern, $recursive, $titlefile)
if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
continue;
}
if ($this->isFileIgnored($file)) {
continue;
}

// get title file
$filename = $file;
Expand Down Expand Up @@ -141,6 +149,35 @@ protected function isExtensionAllowed($file)
return preg_match('/(' . $this->ext . ')$/i', $file);
}

/**
* Check if a file is ignored by the ignore patterns
*
* @param string $file
* @return bool
*/
protected function isFileIgnored($file)
{
foreach ($this->ignore as $pattern) {
if ($this->fnmatch($pattern, $file)) return true;
}
return false;
}

/**
* Load the ignore patterns from the ignore.txt file
*
* @return string[]
*/
protected function loadIgnores()
{
$file = __DIR__ . '/conf/ignore.txt';
$ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$ignore = array_map(function ($line) {
return trim(preg_replace('/\s*#.*$/', '', $line));
}, $ignore);
$ignore = array_filter($ignore);
return $ignore;
}

/**
* Replacement for fnmatch() for windows systems.
Expand Down
Empty file.
85 changes: 85 additions & 0 deletions conf/ignore.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# This is a gitignore style file to ignore typical temporary files and directories

### Linux ###
*~

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*

### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

# iCloud generated files
*.icloud

### MicrosoftOffice ###
*.tmp

# Word temporary
~$*.doc*

# Word Auto Backup File
Backup of *.doc*

# Excel temporary
~$*.xls*

# Excel Backup File
*.xlk

# PowerPoint temporary
~$*.ppt*

# Visio autosave temporary files
*.~vsd*

### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db

# Dump file
*.stackdump

# Folder config file
[Dd]esktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN

# Windows shortcuts
*.lnk

0 comments on commit d6a45e5

Please sign in to comment.