Skip to content

Commit

Permalink
Implement Unicode support by utilizing PosixString and friends
Browse files Browse the repository at this point in the history
Fixes #78
  • Loading branch information
hasufell committed Jan 7, 2024
1 parent b684654 commit 28aa81c
Show file tree
Hide file tree
Showing 28 changed files with 695 additions and 624 deletions.
12 changes: 1 addition & 11 deletions .github/workflows/haskell-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,6 @@ jobs:
compilerVersion: 8.8.4
setup-method: hvr-ppa
allow-failure: false
- compiler: ghc-8.6.5
compilerKind: ghc
compilerVersion: 8.6.5
setup-method: hvr-ppa
allow-failure: false
- compiler: ghc-8.4.4
compilerKind: ghc
compilerVersion: 8.4.4
setup-method: hvr-ppa
allow-failure: false
fail-fast: false
steps:
- name: apt
Expand Down Expand Up @@ -211,7 +201,7 @@ jobs:
echo " ghc-options: -Werror=missing-methods" >> cabal.project
cat >> cabal.project <<EOF
EOF
$HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(bytestring|directory|htar|tar|unix)$/; }' >> cabal.project.local
$HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(bytestring|directory|htar|tar|unix|filepath)$/; }' >> cabal.project.local
cat cabal.project
cat cabal.project.local
- name: dump install plan
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/i386.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ jobs:
run: |
source ~/.ghcup/env
cabal update
cabal test --test-show-details=direct
cabal test --enable-tests --test-show-details=direct
30 changes: 15 additions & 15 deletions Codec/Archive/Tar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ module Codec.Archive.Tar (
FormatError(..),
) where

import Codec.Archive.Tar.Check
import Codec.Archive.Tar.Entry
import Codec.Archive.Tar.Index (hSeekEndEntryOffset)
import Codec.Archive.Tar.LongNames (decodeLongNames, encodeLongNames, DecodeLongNamesError(..))
Expand All @@ -174,12 +173,13 @@ import Codec.Archive.Tar.Types (unfoldEntries, foldlEntries, foldEntries, mapEnt
import Codec.Archive.Tar.Unpack (unpack, unpackAndCheck)
import Codec.Archive.Tar.Write (write)

import Control.Applicative ((<|>))
import Control.Exception (Exception, throw, catch, SomeException(..))
import qualified Data.ByteString.Lazy as BL
import System.IO (withFile, IOMode(..))
import System.IO (IOMode(..))
import Prelude hiding (read)

import System.OsPath (OsPath)
import qualified System.File.OsPath as OSP

-- | Create a new @\".tar\"@ file from a directory of files.
--
-- It is equivalent to calling the standard @tar@ program like so:
Expand Down Expand Up @@ -213,11 +213,11 @@ import Prelude hiding (read)
--
-- * @rwxr-xr-x@ for directories
--
create :: FilePath -- ^ Path of the \".tar\" file to write.
-> FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
create :: OsPath -- ^ Path of the \".tar\" file to write.
-> OsPath -- ^ Base directory
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
-> IO ()
create tar base paths = BL.writeFile tar . write =<< pack base paths
create tar base paths = OSP.writeFile tar . write =<< pack base paths

-- | Extract all the files contained in a @\".tar\"@ file.
--
Expand Down Expand Up @@ -249,22 +249,22 @@ create tar base paths = BL.writeFile tar . write =<< pack base paths
-- containing entries that point outside of the tarball (either absolute paths
-- or relative paths) will be caught and an exception will be thrown.
--
extract :: FilePath -- ^ Destination directory
-> FilePath -- ^ Tarball
extract :: OsPath -- ^ Destination directory
-> OsPath -- ^ Tarball
-> IO ()
extract dir tar = unpack dir . read =<< BL.readFile tar
extract dir tar = unpack dir . read =<< OSP.readFile tar

-- | Append new entries to a @\".tar\"@ file from a directory of files.
--
-- This is much like 'create', except that all the entries are added to the
-- end of an existing tar file. Or if the file does not already exists then
-- it behaves the same as 'create'.
--
append :: FilePath -- ^ Path of the \".tar\" file to write.
-> FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
append :: OsPath -- ^ Path of the \".tar\" file to write.
-> OsPath -- ^ Base directory
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
-> IO ()
append tar base paths =
withFile tar ReadWriteMode $ \hnd -> do
OSP.withFile tar ReadWriteMode $ \hnd -> do
_ <- hSeekEndEntryOffset hnd Nothing
BL.hPut hnd . write =<< pack base paths
141 changes: 87 additions & 54 deletions Codec/Archive/Tar/Check/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE MultiWayIf #-}
{-# OPTIONS_GHC -Wno-orphans #-}
{-# OPTIONS_HADDOCK hide #-}
-----------------------------------------------------------------------------
Expand Down Expand Up @@ -40,15 +42,19 @@ module Codec.Archive.Tar.Check.Internal (
import Codec.Archive.Tar.LongNames
import Codec.Archive.Tar.Types
import Control.Applicative ((<|>))
import qualified Data.ByteString.Lazy.Char8 as Char8
import Data.Maybe (fromMaybe)
import Data.Typeable (Typeable)
import Control.Exception (Exception(..))
import qualified System.FilePath as FilePath.Native
( splitDirectories, isAbsolute, isValid, (</>), takeDirectory, hasDrive )

import qualified System.FilePath.Windows as FilePath.Windows
import qualified System.FilePath.Posix as FilePath.Posix
import System.OsPath (OsPath)
import System.OsPath.Posix (PosixPath)
import qualified System.OsPath as OSP
import qualified System.OsPath.Posix as PFP
import qualified System.OsPath.Windows as WFP

import System.OsString.Posix (pstr)
import System.OsString (osstr)
import qualified System.OsString.Posix as PS
import qualified System.OsString.Windows as WS


--------------------------
Expand Down Expand Up @@ -78,57 +84,79 @@ import qualified System.FilePath.Posix as FilePath.Posix
-- such as exhaustion of file handlers.
checkSecurity
:: Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) FileNameError)
checkSecurity = checkEntries checkEntrySecurity . decodeLongNames

-- | Worker of 'Codec.Archive.Tar.Check.checkSecurity'.
--
-- @since 0.6.0.0
checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError
checkEntrySecurity :: GenEntry PosixPath PosixPath -> Maybe FileNameError
checkEntrySecurity e =
check (entryTarPath e) <|>
case entryContent e of
HardLink link ->
check link
SymbolicLink link ->
check (FilePath.Posix.takeDirectory (entryTarPath e) FilePath.Posix.</> link)
check (PFP.takeDirectory (entryTarPath e) PFP.</> link)
_ -> Nothing
where
checkPosix :: PosixPath -> Maybe FileNameError
checkPosix name
| FilePath.Posix.isAbsolute name
= Just $ AbsoluteFileName name
| not (FilePath.Posix.isValid name)
= Just $ InvalidFileName name
| not (isInsideBaseDir (FilePath.Posix.splitDirectories name))
= Just $ UnsafeLinkTarget name
| otherwise = Nothing

checkNative (fromFilePathToNative -> name)
| FilePath.Native.isAbsolute name || FilePath.Native.hasDrive name
| PFP.isAbsolute name
= Just $ AbsoluteFileName name
| not (FilePath.Native.isValid name)
| not (PFP.isValid name)
= Just $ InvalidFileName name
| not (isInsideBaseDir (FilePath.Native.splitDirectories name))
| not (isInsideBaseDir (PFP.splitDirectories name))
= Just $ UnsafeLinkTarget name
| otherwise = Nothing

check name = checkPosix name <|> checkNative (fromFilePathToNative name)

isInsideBaseDir :: [FilePath] -> Bool
checkNative :: PosixPath -> Maybe FileNameError
checkNative name'
| OSP.isAbsolute name || OSP.hasDrive name
= Just $ AbsoluteFileName name'
| not (OSP.isValid name)
= Just $ InvalidFileName name'
| not (isInsideBaseDir' (OSP.splitDirectories name))
= Just $ UnsafeLinkTarget name'
| otherwise
= Nothing
where
name = fromPosixPath name'

check name = checkPosix name <|> checkNative name

isInsideBaseDir :: [PosixPath] -> Bool
isInsideBaseDir = go 0
where
go :: Word -> [FilePath] -> Bool
go :: Word -> [PosixPath] -> Bool
go !_ [] = True
go 0 (x : _)
| x == [pstr|..|] = False
go lvl (x : xs)
| x == [pstr|..|] = go (lvl - 1) xs
go lvl (x : xs)
| x == [pstr|.|] = go lvl xs
go lvl (_ : xs) = go (lvl + 1) xs

isInsideBaseDir' :: [OsPath] -> Bool
isInsideBaseDir' = go 0
where
go :: Word -> [OsPath] -> Bool
go !_ [] = True
go 0 (".." : _) = False
go lvl (".." : xs) = go (lvl - 1) xs
go lvl ("." : xs) = go lvl xs
go 0 (x : _)
| x == [osstr|..|] = False
go lvl (x : xs)
| x == [osstr|..|] = go (lvl - 1) xs
go lvl (x : xs)
| x == [osstr|.|] = go lvl xs
go lvl (_ : xs) = go (lvl + 1) xs

-- | Errors arising from tar file names being in some way invalid or dangerous
data FileNameError
= InvalidFileName FilePath
| AbsoluteFileName FilePath
| UnsafeLinkTarget FilePath
= InvalidFileName PosixPath
| AbsoluteFileName PosixPath
| UnsafeLinkTarget PosixPath
| FileNameDecodingFailure PosixPath
-- ^ @since 0.6.0.0
deriving (Typeable)

Expand All @@ -142,6 +170,7 @@ showFileNameError mb_plat err = case err of
InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path
AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
UnsafeLinkTarget path -> "Unsafe" ++ plat ++ " link target in tar archive: " ++ show path
FileNameDecodingFailure path -> "Decoding failure of path " ++ show path
where plat = maybe "" (' ':) mb_plat


Expand All @@ -167,17 +196,17 @@ showFileNameError mb_plat err = case err of
-- Not only it is faster, but also alleviates issues with lazy I/O
-- such as exhaustion of file handlers.
checkTarbomb
:: FilePath
:: PosixPath
-> Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) TarBombError)
checkTarbomb expectedTopDir
= checkEntries (checkEntryTarbomb expectedTopDir)
. decodeLongNames

-- | Worker of 'checkTarbomb'.
--
-- @since 0.6.0.0
checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError
checkEntryTarbomb :: PosixPath -> GenEntry PosixPath linkTarget -> Maybe TarBombError
checkEntryTarbomb expectedTopDir entry = do
case entryContent entry of
-- Global extended header aka XGLTYPE aka pax_global_header
Expand All @@ -186,18 +215,18 @@ checkEntryTarbomb expectedTopDir entry = do
-- Extended header referring to the next file in the archive aka XHDTYPE
OtherEntryType 'x' _ _ -> Nothing
_ ->
case FilePath.Posix.splitDirectories (entryTarPath entry) of
case PFP.splitDirectories (entryTarPath entry) of
(topDir:_) | topDir == expectedTopDir -> Nothing
_ -> Just $ TarBombError expectedTopDir (entryTarPath entry)

-- | An error that occurs if a tar file is a \"tar bomb\" that would extract
-- files outside of the intended directory.
data TarBombError
= TarBombError
FilePath -- ^ Path inside archive.
PosixPath -- ^ Path inside archive.
--
-- @since 0.6.0.0
FilePath -- ^ Expected top directory.
PosixPath -- ^ Expected top directory.
deriving (Typeable)

instance Exception TarBombError
Expand Down Expand Up @@ -236,43 +265,44 @@ instance Show TarBombError where
-- such as exhaustion of file handlers.
checkPortability
:: Entries e
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError)
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) PortabilityError)
checkPortability = checkEntries checkEntryPortability . decodeLongNames

-- | Worker of 'checkPortability'.
--
-- @since 0.6.0.0
checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError
checkEntryPortability :: GenEntry PosixPath linkTarget -> Maybe PortabilityError
checkEntryPortability entry
| entryFormat entry `elem` [V7Format, GnuFormat]
= Just $ NonPortableFormat (entryFormat entry)

| not (portableFileType (entryContent entry))
= Just NonPortableFileType

| not (all portableChar posixPath)
| not (PS.all portableChar posixPath)
= Just $ NonPortableEntryNameChar posixPath

| not (FilePath.Posix.isValid posixPath)
| not (PFP.isValid posixPath)
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| not (FilePath.Windows.isValid windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
| not (WFP.isValid windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName posixPath)

| FilePath.Posix.isAbsolute posixPath
| PFP.isAbsolute posixPath
= Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
| FilePath.Windows.isAbsolute windowsPath
= Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
| WFP.isAbsolute windowsPath
= Just $ NonPortableFileName "windows" (AbsoluteFileName posixPath)

| any (=="..") (FilePath.Posix.splitDirectories posixPath)
| any (== [PS.pstr|..|]) (PFP.splitDirectories posixPath)
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
| any (=="..") (FilePath.Windows.splitDirectories windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
| any (== [WS.pstr|..|]) (WFP.splitDirectories windowsPath)
= Just $ NonPortableFileName "windows" (InvalidFileName posixPath)

| otherwise = Nothing
| otherwise
= Nothing

where
posixPath = entryTarPath entry
windowsPath = fromFilePathToWindowsPath posixPath
posixPath = entryTarPath entry
windowsPath = toWindowsPath posixPath

portableFileType ftype = case ftype of
NormalFile {} -> True
Expand All @@ -281,14 +311,15 @@ checkEntryPortability entry
Directory -> True
_ -> False

portableChar c = c <= '\127'
portableChar c = PS.toChar c <= '\127'

-- | Portability problems in a tar archive
data PortabilityError
= NonPortableFormat Format
| NonPortableFileType
| NonPortableEntryNameChar FilePath
| NonPortableEntryNameChar PosixPath
| NonPortableFileName PortabilityPlatform FileNameError
| NonPortableDecodingFailure PosixPath
deriving (Typeable)

-- | The name of a platform that portability issues arise from
Expand All @@ -306,6 +337,8 @@ instance Show PortabilityError where
= "Non-portable character in archive entry name: " ++ show posixPath
show (NonPortableFileName platform err)
= showFileNameError (Just platform) err
show (NonPortableDecodingFailure posixPath)
= "Decoding failure of path " ++ show posixPath

--------------------------
-- Utils
Expand Down
Loading

0 comments on commit 28aa81c

Please sign in to comment.