From 24a87d8d1f316fdfa6bb33c42b1153eec6dcbebc Mon Sep 17 00:00:00 2001 From: shikokuchuo <53399081+shikokuchuo@users.noreply.github.com> Date: Wed, 29 May 2024 14:31:39 +0100 Subject: [PATCH] add mirai_map comparison to vignette --- vignettes/mirai.Rmd | 137 +++++++++++++++++++++++---------------- vignettes/mirai.Rmd.orig | 16 ++++- 2 files changed, 96 insertions(+), 57 deletions(-) diff --git a/vignettes/mirai.Rmd b/vignettes/mirai.Rmd index ee405d62f..ffb637089 100644 --- a/vignettes/mirai.Rmd +++ b/vignettes/mirai.Rmd @@ -61,13 +61,13 @@ Upon completion, the 'mirai' resolves automatically to the evaluated result. ``` r m$data -#> [1] 56.65454 +#> [1] 58.49851 ``` Alternatively, to wait for and collect the result, use the `[]` method: ``` r m[] -#> [1] 56.65454 +#> [1] 58.49851 ``` For easy programmatic use of `mirai()`, '.expr' accepts a pre-constructed language object, and also a list of named arguments passed via '.args'. So, the following would be equivalent to the above: @@ -83,7 +83,7 @@ args <- list(mean = input$x, sd = input$y) m <- mirai(.expr = expr, .args = args) m[] -#> [1] 55.97453 +#> [1] 57.92717 ``` [« Back to ToC](#table-of-contents) @@ -163,10 +163,10 @@ for (i in 1:10) { #> iteration 2 successful #> iteration 3 successful #> iteration 4 successful +#> Error: random error #> iteration 5 successful #> iteration 6 successful #> iteration 7 successful -#> Error: random error #> iteration 8 successful #> iteration 9 successful #> iteration 10 successful @@ -201,12 +201,12 @@ status() #> #> $daemons #> i online instance assigned complete -#> abstract://cf97affccf81e9c5f127b002 1 1 1 0 0 -#> abstract://e27dd5968a15cb70d27a33e9 2 1 1 0 0 -#> abstract://958c88c6f17e9cdff59169e8 3 1 1 0 0 -#> abstract://593fa5fa90911cda90bb4bdb 4 1 1 0 0 -#> abstract://4b29729086ffa0e7b30fc235 5 1 1 0 0 -#> abstract://78c721725915d4ae0e756f87 6 1 1 0 0 +#> abstract://ac4ecf271e0a1a95751fd910 1 1 1 0 0 +#> abstract://71dd91be1e8def75aecf0ac5 2 1 1 0 0 +#> abstract://aa719b87bde161201a78904b 3 1 1 0 0 +#> abstract://d26d8b44875e1c040e034514 4 1 1 0 0 +#> abstract://1d67b760aeacdead396ed34d 5 1 1 0 0 +#> abstract://c508c8a686d0e3d0cf52a4b8 6 1 1 0 0 ``` The default `dispatcher = TRUE` creates a `dispatcher()` background process that connects to individual daemon processes on the local machine. This ensures that tasks are dispatched efficiently on a first-in first-out (FIFO) basis to daemons for processing. Tasks are queued at the dispatcher and sent to a daemon as soon as it can accept the task for immediate execution. @@ -235,7 +235,7 @@ status() #> [1] 6 #> #> $daemons -#> [1] "abstract://9397ae4459699159a3f71513" +#> [1] "abstract://1de9ebdcc50e7e95dea94065" ``` This implementation sends tasks immediately, and ensures that tasks are evenly-distributed amongst daemons. This means that optimal scheduling is not guaranteed as the duration of tasks cannot be known *a priori*. As an example, tasks could be queued at a daemon behind a long-running task, whilst other daemons are idle having already completed their tasks. @@ -261,14 +261,14 @@ By super-assignment, the conenction 'con' will be available in the global enviro ``` r m <- mirai(capture.output(str(con))) call_mirai(m)$data -#> [1] "Formal class 'SQLiteConnection' [package \"RSQLite\"] with 8 slots" -#> [2] " ..@ ptr : " -#> [3] " ..@ dbname : chr \"/tmp/Rtmp1gVzMY/file356315d6cc1a9\"" -#> [4] " ..@ loadable.extensions: logi TRUE" -#> [5] " ..@ flags : int 70" -#> [6] " ..@ vfs : chr \"\"" -#> [7] " ..@ ref : " -#> [8] " ..@ bigint : chr \"integer64\"" +#> [1] "Formal class 'SQLiteConnection' [package \"RSQLite\"] with 8 slots" +#> [2] " ..@ ptr : " +#> [3] " ..@ dbname : chr \"/tmp/RtmpTVJ1nr/filedd4d89f34d\"" +#> [4] " ..@ loadable.extensions: logi TRUE" +#> [5] " ..@ flags : int 70" +#> [6] " ..@ vfs : chr \"\"" +#> [7] " ..@ ref : " +#> [8] " ..@ bigint : chr \"integer64\"" #> [9] " ..@ extended_types : logi FALSE" ``` Disconnect from the database everywhere, and set the number of daemons to zero to reset. @@ -357,7 +357,7 @@ By specifying `dispatcher = FALSE`, remote daemons connect directly to the host ``` r daemons(url = host_url(), dispatcher = FALSE) -#> [1] "tcp://hostname:40513" +#> [1] "tcp://hostname:43735" ``` Note that above, calling `host_url()` without a port value uses the default of '0'. This is a wildcard value that will automatically cause a free ephemeral port to be assigned. The actual assigned port is provided in the return value of the call, or it may be queried at any time via `status()`. @@ -371,7 +371,7 @@ status() #> [1] 0 #> #> $daemons -#> [1] "tcp://hostname:40513" +#> [1] "tcp://hostname:43735" ``` To reset all connections and revert to default behaviour: @@ -441,10 +441,10 @@ daemons(n = 2, url = host_url()) launch_remote(1:2) #> [1] -#> Rscript -e "mirai::daemon('tcp://hostname:42581',rs=c(10407,-811844038,1837155955,10862872,-1331570631,-2117203290,1932457199))" +#> Rscript -e "mirai::daemon('tcp://hostname:39803',rs=c(10407,833267321,378827750,313097519,172204452,1224852565,1370982674))" #> #> [2] -#> Rscript -e "mirai::daemon('tcp://hostname:44973',rs=c(10407,-362965075,764908273,-70314687,164597619,1786023308,1932284711))" +#> Rscript -e "mirai::daemon('tcp://hostname:46119',rs=c(10407,-1355041871,2058095139,1498982995,98322317,751807949,1152247332))" daemons(0) #> [1] 0 @@ -472,37 +472,37 @@ The generated self-signed certificate is available via `launch_remote()`. This f ``` r launch_remote(1) #> [1] -#> Rscript -e "mirai::daemon('wss://hostname:43627/1',tls=c('-----BEGIN CERTIFICATE----- +#> Rscript -e "mirai::daemon('wss://hostname:42387/1',tls=c('-----BEGIN CERTIFICATE----- #> MIIFNzCCAx+gAwIBAgIBATANBgkqhkiG9w0BAQsFADAzMREwDwYDVQQDDAhrdW1h #> bW90bzERMA8GA1UECgwITmFub25leHQxCzAJBgNVBAYTAkpQMB4XDTAxMDEwMTAw #> MDAwMFoXDTMwMTIzMTIzNTk1OVowMzERMA8GA1UEAwwIa3VtYW1vdG8xETAPBgNV #> BAoMCE5hbm9uZXh0MQswCQYDVQQGEwJKUDCCAiIwDQYJKoZIhvcNAQEBBQADggIP -#> ADCCAgoCggIBAKT9AMDyEtdcQdGJ8YgDOUeJ0h7yg0vvbw801cru/VVQ9BHO1DA8 -#> s1y59iuo190cKlc/jJftcsW1IygMMl63V7x3TysqekV8Wh+o8J3ixREzgz++sNtS -#> U0B3UnY/bmikfaLlR7tYlkHWt1lF1ow6vaSLK/zAq80O3M94kQiDNoaKqTN8dZoV -#> 340Uiev2pRNheR91MHZdMRhb5ksk4CPpA3PSPzSN+4gh0T3VPlmwz386Tju+2Ang -#> hynzNZykP2OX8PcBFvg/Xzamy1IbZkjkSrg0wdpWppGZzN9irCoAAt1tSuciqolP -#> xKCR6stZyiYeVkI/fd60GTUFE/ty/GDmOTzPO+kfeOeF7yHpYtF2h3gaaOVc3ke4 -#> 9X0EfWpr8ld6R5gigf70kGupxMYOqW9eOTHd1w4J7p7jpn9xMygPbs0wkcvxzfB8 -#> zG4xCV61eQrqs/z1fppCWyXoJ7mVI4zRseXDLw/9TneIeKocfP1JR5veNbBlH8UB -#> 8StMjOi7f8IRhlYiZbdE9Qyl48HFGtBChLcKX87gMGapm3PqGqTVEpnnia4l3GlY -#> kHs9jW2p2ACBLUNileuDjE+NmFUYdEiMeKdkJCXr4kRqHtzekSx328F1gkn8FMx6 -#> 3l1mgKWGLsZuPtfNpI2R6z0QHHjUAa+9NzR9DiQYye82qq+ZIztI+wcnAgMBAAGj -#> VjBUMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFDDPrsVNJGCFgXYOWtCX -#> TOOsSyQxMB8GA1UdIwQYMBaAFDDPrsVNJGCFgXYOWtCXTOOsSyQxMA0GCSqGSIb3 -#> DQEBCwUAA4ICAQCP05ejn563CaxPTRJUz1WSTGUJ2i1egrCBvXOVRkYV3oysUGnk -#> uyVrXFWWMifQynzfmEuXuXV4qsWHFkk88aHsbfDGqqviYQJSlCpnq+8nBojG1ZaX -#> DH2lTWOVWrZN7CxILn6fS3zlY6On/+ZnUqcWVgeq9kGPAZ4eLs0eqSGs7VyQi++M -#> QHKXclZIeG/XE1Nv0H3/YBO6WkaP7K+ocICrWRnBhoqHKJsh7G2u07I7YY1iz4b+ -#> 8MxKb4KzZN3JbRKYIb2S2skjJUaRUCtsHbJK/NJMyJkqONJQ0x3Gx/aJtyTUGkBS -#> a+bdkkeqC6QrTimUb1gZzmEXRTdxX2OkW7F6zr7iSivpEGRh8hNjERkBWmD1ykP6 -#> Gd6YewSCWqADnDZwdmd4KqyywA0tobjjBy8jWyZs8szev4q7YZmLwNR/w+0/irkB -#> K0Oaj+Gxu/4pM5BVlh5pcex/fAKztyulq1fdE2rrOIuL22ANTBwaKARbBmJnkyFj -#> gH8ZE9Utu1tD8VGqzXV8X05Zd3qSgM05W0KhP4L9+7sueUArx+w4ObedVJEo2FKm -#> Df+q5npgbE56TreV2fKwM9tn2bpCUOFdZX+1lVVvFyYBqbGp0VA+iOGpFcsJtLyH -#> fsLHhtCUXvDo8XE+sQtUDrQfplSWbK60oE6kFwuR4squxS69MetIqCk0gA== +#> ADCCAgoCggIBAOWLBt74J+Ch7KleJSzh9Wd1B/P/kTB30N9GveCri1Q6ipYdXmHW +#> jqSSTcFFqOVSJkPXljOUmwc56obn3XNU53B3yJt+8WLm8tfAsS3Mio4BLFUYCmAV +#> CEyQvLc1D14pIGT2lEM8nUCL5owhrfPX48QD/YULW7mvBaGL2ZZo6Viy+mQE+f2w +#> nOKaRbJNofIrDibHcrjgWINvyy0e1WME2AoVjKSwe1TMg0osQPgB0iQ2K8LJfUkH +#> f15PzDxGb9BeQWIrPP+arsJdimamfZ67QFL/pq7iKKctWDj1GbhEVfrStz23R+8V +#> k1WY663hCx9PUV4TSnWACSdbvJWbHM4lY3IYCKtMrD3SJgD5cvcVh5uH9ER/TY2P +#> nGcCNdok6s1yJ9QMvldsgTpISnUQfYi+FzYB26/mrz8WIRQwPcVgjBFtSNQF5c/S +#> upoK+Og3OfbB+vNrRnpzgD560IHqTHHob1Wty0TZBMnWiUEJzkW3OUuvtOBPOepr +#> Pqa64D9K12koBfCA3oodzDH5ck24PIWzfu9bI/jFMPoJEh7W63Qt30kFBin716xJ +#> +X6y8TTcv7crar9EQKU1mx7I2XviDNuCZdhLznn/WpqQ1gEU8G9daq7Z4uVmPM74 +#> ZzDWA43YjobFIlpbiu6RfB2chqVo6AMD4DfhPmExuRnlooA9jTOGO1SPAgMBAAGj +#> VjBUMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFN8UmW5yHIXcrtif9MsS +#> 6ihOBQdlMB8GA1UdIwQYMBaAFN8UmW5yHIXcrtif9MsS6ihOBQdlMA0GCSqGSIb3 +#> DQEBCwUAA4ICAQDBQFMLPF/u1uEZ+pXh7kYLHtq8Krrn0556xtF2Z2n1dhkAfmI4 +#> RFM2yROTZqzlPt2CNNOekeCEecLEv96jMC14SXWHOhMaaqkyzJHtb2fhmo+k9n3+ +#> 4X6Nu2HaajVqOmTkAAPeraLzarUzHtHVxnilRuYpwT6ViZ60qa4pGZ/KqCi1xCnZ +#> A+9bejOx/+8mdyKZYdmSyvgbki6tuINHR4VHMVxoAAcxTX6wXNLl+FFVOb/p8MK1 +#> fqtMHxfGKSmGChPYcCn6LCnXMxtEUyCd2o5MwOxq9ptXzK1mVsmvmKVNb8UMs8BN +#> rQZEn2zQV689ZVzs6bGB0v2BWrqGpq94fVB8fFsfwCPITa+tVV7fIwvJhMO2PJYa +#> kQQR+RCYuGHdbIPPaWrZFNoGbmycNso5UV9YbE75/pQHWPSRKFuC39lL/Mglqftc +#> PRgL/VAHHWFi2NHkUNA9tujgJrzxspQWNk6bZDpEA252VhI08OnppBOiCs5pdqZG +#> Cp2a3+3av//5aHmA+RsL8azlxRGdnATMyBIDeHMLLL+3YBdEiomlG8NLaR3cjbv2 +#> hgNDXK7INv9Is6GGxCN/fZ1IeaAJ7KX43IsoGyGa9QEVC7dy9dUCd457t/0dW6pO +#> 6aoTKoORZR8WNMvstaHbdU2Fe/Elm7wOU5rfrVACKQpCc6kNezMiVqn6yA== #> -----END CERTIFICATE----- -#> ',''),rs=c(10407,-1540989762,-1037309401,-1573553092,408913933,-428500118,544986595))" +#> ',''),rs=c(10407,-2021112118,-1165823549,-219840472,-1452169207,496305206,1170283839))" ``` The printed value may be deployed directly on a remote machine. @@ -729,7 +729,32 @@ Key advantages of `mirai_map()`: - The '.promise' argument allows a promise to registered against each mirai, which can be used to perform side-effects. - Returns evaluation errors as 'miraiError' or 'errorValue' as the case may be, rather than causing the entire operation to fail. This allows more efficient recovery from partial failure. -- Does not rely on a 'chunking' algorithm that attempts to split work into batches according to the number of available daemons, as implemented for example in the `parallel` package. Chunking cannot take into account varying or unpredictable compute times over the indices. It can be optimal to rely on `mirai` for scheduling instead. +- Does not rely on a 'chunking' algorithm that attempts to split work into batches according to the number of available daemons, as implemented for example in the `parallel` package. Chunking cannot take into account varying or unpredictable compute times over the indices. It can be optimal to rely on `mirai` for scheduling instead. This is demonstrated in the example below. + + +``` r +library(mirai) +library(parallel) + +daemons(4, retry = FALSE) +#> [1] 4 + +cl <- make_cluster(4) + +vec <- c(1, 1, 4, 4, 1, 1, 1, 1) + +system.time(mirai_map(vec, Sys.sleep)[]) +#> user system elapsed +#> 0.005 0.005 4.008 + +system.time(parLapplyLB(cl, vec, Sys.sleep)) +#> user system elapsed +#> 0.005 0.008 5.009 + +system.time(parLapply(cl, vec, Sys.sleep)) +#> user system elapsed +#> 0.002 0.008 8.009 +``` `.args` is used to specify further constant arguments to `.f` - the 'mean' and 'sd' in the example below: @@ -740,13 +765,13 @@ with( mirai_map(1:3, rnorm, .args = list(mean = 20, sd = 2))[] ) #> [[1]] -#> [1] 18.37725 +#> [1] 18.4346 #> #> [[2]] -#> [1] 23.25623 21.28882 +#> [1] 16.24874 20.58363 #> #> [[3]] -#> [1] 18.78728 20.12737 25.94307 +#> [1] 18.95603 19.46307 18.74855 ``` Use `...` to further specify objects referenced but not defined in `.f` - the 'do' in the anonymous function below: @@ -765,13 +790,13 @@ ml ml[] #> $a -#> [1] "5b" +#> [1] "41" #> #> $b -#> [1] f7 a8 +#> [1] 14 fe #> #> $c -#> [1] "e64a70" +#> [1] "7f1612" ``` Use of `mirai_map()` assumes that `daemons()` have previously been set. If not then one (non-dispatcher) daemon is set to allow the function to proceed. This ensures safe behaviour, but is unlikely to be optimal, so please ensure daemons are set beforehand. diff --git a/vignettes/mirai.Rmd.orig b/vignettes/mirai.Rmd.orig index 598d4740e..c588d576f 100644 --- a/vignettes/mirai.Rmd.orig +++ b/vignettes/mirai.Rmd.orig @@ -551,7 +551,21 @@ Key advantages of `mirai_map()`: - The '.promise' argument allows a promise to registered against each mirai, which can be used to perform side-effects. - Returns evaluation errors as 'miraiError' or 'errorValue' as the case may be, rather than causing the entire operation to fail. This allows more efficient recovery from partial failure. -- Does not rely on a 'chunking' algorithm that attempts to split work into batches according to the number of available daemons, as implemented for example in the `parallel` package. Chunking cannot take into account varying or unpredictable compute times over the indices. It can be optimal to rely on `mirai` for scheduling instead. +- Does not rely on a 'chunking' algorithm that attempts to split work into batches according to the number of available daemons, as implemented for example in the `parallel` package. Chunking cannot take into account varying or unpredictable compute times over the indices. It can be optimal to rely on `mirai` for scheduling instead. This is demonstrated in the example below. + +```{r chunk} +library(mirai) +library(parallel) + +daemons(4, retry = FALSE) +cl <- make_cluster(4) + +vec <- c(1, 1, 4, 4, 1, 1, 1, 1) + +system.time(mirai_map(vec, Sys.sleep)[]) +system.time(parLapplyLB(cl, vec, Sys.sleep)) +system.time(parLapply(cl, vec, Sys.sleep)) +``` `.args` is used to specify further constant arguments to `.f` - the 'mean' and 'sd' in the example below: