-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replace functionality of csv with streamly #32
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,9 +57,15 @@ import Debug.Trace (trace) | |
import Statistics.Types (Estimate(..), ConfInt(..)) | ||
import System.Directory (createDirectoryIfMissing) | ||
import System.FilePath ((</>)) | ||
import Text.CSV (CSV, parseCSVFromFile) | ||
import System.IO (IOMode(..)) | ||
import Text.Read (readMaybe) | ||
|
||
import qualified Streamly.Data.Fold as Fold | ||
import qualified Streamly.FileSystem.Handle as Handle | ||
import qualified Streamly.Prelude as Stream | ||
import qualified Streamly.Unicode.Stream as Unicode | ||
import qualified System.IO as IO | ||
|
||
import BenchShow.Analysis | ||
|
||
------------------------------------------------------------------------------- | ||
|
@@ -81,6 +87,28 @@ filterSanity label old new = do | |
++ "\nOriginal groups: " ++ show old | ||
++ "\nNew groups: " ++ show new | ||
|
||
type CSV = [[String]] | ||
|
||
-- XXX This is ugly in performance but works for the time being. | ||
-- XXX This lib should not depend on internal modules of streamly. | ||
parseCSVFromFile :: FilePath -> IO CSV | ||
parseCSVFromFile inFile = do | ||
src <- IO.openFile inFile ReadMode | ||
|
||
Stream.unfold Handle.read src -- SerialT IO Word8 | ||
& Unicode.decodeUtf8 -- SerialT IO Char | ||
& parseLines -- IO CSV | ||
|
||
where | ||
|
||
parseLine ls = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
Stream.toList $ Stream.splitOn (== ',') Fold.toList $ Stream.fromList ls | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This won't work with quoted fields that include escaped commas. We had frame parsing in streamly for that use case, but not sure if we have implemented it yet. |
||
|
||
parseLines strm = | ||
Stream.splitOn (== '\n') Fold.toList strm | ||
& Stream.mapM parseLine | ||
& Stream.toList | ||
|
||
------------------------------------------------------------------------------- | ||
|
||
data ReportType = TextReport | GraphicalChart | ||
|
@@ -939,17 +967,14 @@ prepareToReport inputFile Config{..} = do | |
Just dir -> createDirectoryIfMissing True dir | ||
-- We assume the dataset is not big and therefore take liberties to process | ||
-- in a non-streaming fashion. | ||
csvData <- parseCSVFromFile inputFile | ||
case csvData of | ||
Left e -> error $ show e | ||
Right csvlines -> do | ||
when (null csvlines) $ error $ "The input file [" | ||
++ show inputFile ++ "] is empty" | ||
let allFields = head csvlines | ||
fields = selectFields allFields | ||
filterSanity "selectFields" allFields fields | ||
let filt x = notElem (map toLower x) ["name", "iters"] | ||
return (csvlines, filter filt fields) | ||
csvlines <- parseCSVFromFile inputFile | ||
when (null csvlines) $ error $ "The input file [" | ||
++ show inputFile ++ "] is empty" | ||
let allFields = head csvlines | ||
fields = selectFields allFields | ||
filterSanity "selectFields" allFields fields | ||
let filt x = notElem (map toLower x) ["name", "iters"] | ||
return (csvlines, filter filt fields) | ||
|
||
-- Keep only those benchmarks that belong to the group. | ||
filterGroupBenchmarks :: [GroupMatrix] -> IO [GroupMatrix] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
resolver: lts-16.11 | ||
resolver: lts-18.23 | ||
packages: | ||
- '.' | ||
extra-deps: | ||
- streamly-0.8.1.1 | ||
- unicode-data-0.2.0 | ||
- fusion-plugin-types-0.1.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why would that be the case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not truly streaming. I'm not using folds or parsers which I believe is more cleaner and the pipeline is more likely to fuse resulting in better performance.