From f10d1297c11e369a30f00a6c2556836422ba3138 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 1 Dec 2023 09:28:25 +0000
Subject: [PATCH 01/30] add some test data

---
 tests/test_data/config_demo.yaml |  30 ++++++++++++++++++++++++++++++
 tests/test_data/iow-bus-gtfs.zip | Bin 0 -> 26630 bytes
 2 files changed, 30 insertions(+)
 create mode 100644 tests/test_data/config_demo.yaml
 create mode 100755 tests/test_data/iow-bus-gtfs.zip
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
new file mode 100644
index 0000000..66db500
--- /dev/null
+++ b/tests/test_data/config_demo.yaml
@@ -0,0 +1,30 @@
+paths:
+  path_gtfs: /mnt/efs/otp/gtfs_transfers/iow-bus-gtfs.zip
+  path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow
+  path_zone_centroids: /mnt/efs/zones/msoa_centroids.geojson # path to zone centroids to skim
+
+
+settings:
+  calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file.
+  start_s : 32400 # sec | Start time of the journey.
+  end_s : 41400 # sec | Max end time of a journey.
+  walk_distance_threshold : 2000  # m | Max walk distance in a leg
+  walk_speed : 4.5  # kph | Walking speed
+  crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
+  max_transfer_time : 1800 # Max combined time of walking and waiting (sec)
+  k : 500 # max nearest neighbours when calculating distances
+  max_wait : 1800  # sec | Max wait time at a stop
+  bounding_box : null
+  
+steps:
+  - preprocessing
+  - connectors
+  - graph
+
+# Settings for AWS batch (Optional)
+aws_job_definition:
+  image: "815306348607.dkr.ecr.eu-west-1.amazonaws.com/ukpop:latest"
+  memory: 32000
+  vcpus: 4
+  job_definition_name: "gtfs-ukpop11"
+  job_queue: "general_purpose_queue"
\ No newline at end of file
diff --git a/tests/test_data/iow-bus-gtfs.zip b/tests/test_data/iow-bus-gtfs.zip
new file mode 100755
index 0000000000000000000000000000000000000000..1bcb7ae093b0d7d6cbdc8f95f9727cef26ac61e7
GIT binary patch
literal 26630
zcmb?@by$>5^eCNzONpRJihv?Wh%_uMEg~hM(%s!5pu~boEad_&DJYFJqM%4GAT2E*
z-F;^l_4~ei@9%l;ANP-$eczcgr_Y%)@4mYl%D8ycSXfv@SoU%VwFy=`-birAgpGwo
z0zMyEnAtz^y6Nias&!XIy+gSus!K()S2MU|lhQ?tqP92lGF}cFzfd`s<as5LYqxj|
zBNLT5NAWolQjA~b|G2Mz&eZ(nOKRPPrNqR<r7(j1pWXyZ2i^pPJ_prHSLf$(W(}2}
zZ;;;G_S@LJx3L*`gc5^sHa(Iucl5VXlDF1Ohw558E3xlCt(JR=H6#^}LyL{2@t5JR
zS_n0~TD@P1fKgT)EUa_j^T{JyGkeoV&S;kXx|&FEm>~o|e7coA$3I4y4)xuLm-lIt
zYR#-PRt<A|^jOM&QtO1GCv4Z*W%KbTwsanc4k5z3HwU(y26k0qsr75a-#oSC2)m%=
z!|(+U{+2nBr*nipmXPyWTS!A`6fBUQ+EOzUN2+lO^;$PPr|`gb%*@q$lV;;wamdtC
zTDWuV63MCP%WofQ{2$gG(`QXFkUw*~{zm|;GmS%Tw6Ful(T(z70vMY<ay4^7i^1Yz
z=In0u#LU>rl;s8sdKi3|c|I|7bhUD@H+J=MG-Dwn`GfV$j)HMJnohuml(=Vhbart2
zjV<k-szWpHxjOaXfC%rKy|VkmaeVJpah^UXi==v%YsDHI9se@_)L%`Rg0a%Dci7+G
z%iqZ)*YTaE6@-8fcl1F=z>5lDx=LxUK;^qs=9S6Rzji3A7KxfRwNR^1r8hn<*eAxa
zB~}fR$UW4JyR$%z_!)7P)77Nq8cT?|^Z8WU_tF==(v8~trl&9UzI{-WmSkBn_`~M$
z=eM)OQ3**BYh?O{af5;gat1oBfTb$D_H~~}8uSx&Cb%2%9B(xriuaY3WFiu^-#7{l
z`{m2IwS*Ve@&%|Cc8kCk=q5B`4<7Lv7FRubsQ#GT*4AG^QqW}cp7-Wsy<0bt&>Y35
zHi-$BzCTg-8)7==KXEh@&3JDeHBDuL{rJ=B-BF>$U#szjRK_)F!w2EUQk-*lYxLU-
zML51)PGh)2#`Cf#fb+$@2eIFZdEWA;OWc<AdCzzEF>=X}W35(6HS+CEfz=>09OKyN
zuN$`&Jxi6l4?ZUzz$O1{7&YRu2X7Rxu!@ecvB-hBU0fX;ja{wm&P@F(t*Pip%zkzk
z_nWfkv9WHvw)>djgBO)V^e>DAcwd-`q=}ureCwhZS#NeP`aGHc*pZ<@!Y7%Yc<cSv
ziPIg%(~Zgi!;>2U`-k9eB4A(gbR)Lmc%s4oWZU@kfDw#Ko-C~f%$}}gobGve8Vv-T
zY@Z$v8lNr)WSn*_tTCRRR0bTjZb+WC9vT`4By1mR4P>|(67VOwsoEUtTJ4v{)SDR)
zY;M(^Y^u#}7SEq1c(YuOBg{;9&-+|=WP8Y$RNMIAtCz#9Sww!N@%9c6%lz~2s;uLZ
zc{f9pGJlQ8U7i-a?I$}TVw8n)rC433H#ktTBvkd&SlTb14|{53EBR_L(97-P_PGkv
zRAb2{AG&XIzPHWWh34T2O)qN#80W((9!DHoh$b}MpZZYV;kxO5L~VqN+-mmT9x?a+
zu*m;*F4=y`pQ3m@d7vxgM!U&ZLx$q#-#Y}qs(*Tt$ti2aEhKKZZI?f1$5lP{YGENZ
z<zZILU|Z|<ZSB53w?Ug<(}EI-RYd&1B<wHC3T)F49_pxO3{G2gw42-&9dM6!X1%jv
z=`M44gxkO6<=3vPs8!sfZ-02S>HgA;_YK1~eL*?n*n%(5@Q(!*W+R=9!7)glcyA{v
z$7kC_{6mh<2j-h@NXY6yOl9@wYXbddyxZJf87($~Ey}BlX5$5}$4klwzUL_~d^ad|
zLt>4%X>G({-SOY>p;N&&8hpV;u(!pRLZ*dlacM^A2MK;!UL%ddB2l(By{cAFL5=Z~
zEXh+;**RpzB#ygF*j5k1j92ibvHAk9RKGz91!DW-$Y}5QX9EkP_re*E1g>rlN}jL3
z5t6q$-cR^tOW()<2AI|ty4GhWd}>-2h=j~OyYgkr;G`CHkpk#Kr{QX!lU~NX+v=Qy
zHs`MPxiAXFFvCSkMhugS+IhTEEEfff26>X}+Yi;vMNO<lhYjn<oXSF9^Ka{wo4*yx
zywYy6Xeijc<>ha4zFzfw{jijv-^rJ9K7BQ<C-fpHNJh5;4w7=P^y^5p$=>U{mVU;e
zt^I4#Pcr(br>e%sY!8m6Klj|#id2p*UD(`Mz!Nisi%96Vt7bURL+z{eFBK1+`>r1R
zpxYUK%?atMe5<Z*Hse{7s@92jyUVkMWp%zpPf1?&Oe6KGq?mEni*`l_%y`Dg4rNKH
zU*2Rq$B<y0YOe*t2${z}=H4(u*S1G$w{Y>CV>(ebNn0`zZ-Js@`m-6MbK(BoD1>$I
zc?jJ9KF={0VnJd1pPMhT+r6>V)9&OJ5NrBooG3<X5|iHZSab{FaGazqTq)@T1QLt=
z81kC<DV!u~fiO_@Qk{4XBM!!Fk5nYF?q~mA;!yo&%SW=d)!=TFx%CNU$B%YP<lLYo
zL2!>s$3!MwYQcC7ZAWPCsu^#CJU^ty=;L#9XxaK-km4Q{BJ!s*a{Lgm+&>!3VU`a>
zd^<zK@};lCNB}o(cEOwn9%Le*wP_=OyB=S|)euX3siQNBKwMFa;8Xk7SaN%J<@fP=
z^tIuTKcBvo7U8FMSA!nuM@!mQD=$6?R`EJ6s}6xfU+x5|RQla}L&ig;${cfjtxhFr
zNMLlN(8>UK2;xBL6`u$k*mtQi#r}{@WuEP|kl>CXPoI5e2a+NK39)l8o4@p)q1}Yb
z$!*;J!{N75#YU8cTUf}L>#p99$D$@AJ&WNr3!C}sg99HlBu7;<0!FP33AAwAv@th)
z*!5Qh)HD0fd-RQIPkbM)d;iwIDg|4=pv4#7+qZBq_;G71`N^7*(KP><{twCC=-03Z
zT3MqNOLihE>Vt;a!GO0*K1t~k?fSI@DiA9h=ADb)-hFI}s&)!CUS<t-lJ<I$zO1&x
zc!5@0?tSc7Dk&u?x3PaPeYGEElTyaqd@t)!12?MeCf1ieml0$2A%7uNvBsU1jfPQJ
zN+}y!!@%QIhtyM~c<sK%T2Y~mn0T95bE4fOzUqxClY8A$aN83qerQESgaHiwVdB`w
z4gs_>z(g<dSB`2ltc(-PN3f|6s-{SpdVR1T#tXL6+5uoZb=NH@_wF)QW&<bT>50P1
z(%Z<S!<H`vet;8Y{^hwjNttCrt);w4pS+|3Kfbj-!KShRYa?dt2Y2O+EI$@yN3V=P
z7EhL*a~2)3eCZZ-$Ipk`<7t*iSwEhTeQtQ%_icJL0j<pF${BfnN`N%l{H`OfXHOI!
zdPy=RH~gv)JP2l)Fls3eoNPOjF7k%@;B#}fiyiToIuvWSifSz@PJ}~@dPaY>m-XQ2
z;YH06C-Dfo-rS4NLRNSk#mhjoYrgM}`hH=4Kdl@Chk{8qa^EkSTE(~gzhJ*#&?@1V
zKD??x%tfK<m-QJ;#+xRA{4||-nUbzlRj`}^eT8rAj6zxT!^_7bcCIR@))mMnNl4OK
zzb3S@Zf^x^xawze^PX4%YExl6!q!{>wPvs$wdz$gLh|L!k6YFI@mYBMp76U-#?Dfx
zU;wnZcd=mx)-ASIap9xy7vYi{Zg)<1J5dn^2NXq(+c(|VgSu#LUfIoVyqcMor*m<G
z2ERxskDsOrUPt$1?24KNiQ~i7`*qklWSRUlCiN_huO!>Tso=vuA4Ky?K_}%qsH#fJ
zKUSG<CBru=N5tw^OXfax<p|(=mSdH}m8tii$!HDNG=z7nJnplxoul_5EgOQ+Y*Prn
zvh>hCx9(RhZ;fEG6UyY$RLBhLu#T9%X4h^)f<SmNN$BAbCk@BQC)Jz#4<$p(g3@B%
z6H1=G9OO&78-+-gwPwxX53~GuAIeXxMuoaR<=qN0Y|C0PlJbcq`}<>RN?xV1B{~oG
zb?atv!=COfckeHk<g}WNt+;vX{LD)p(&K1F_=*qOdLMq{8}i_NWyw6SvvaL^t-ASs
zruds_dB+<))S0iwN*0z2+wm;#UUZ-eENvf*%Qd4@VIEjIPu(}-^`dJ-K=7+kp8Ku3
zG|m?5MZsYfjlRi@8!x(^AuF=LR=w$K*&coQ)i%9y*zCtAHKX~I>4C(JG&j-x`+4Fd
ztY41mjaap2U!Gnw!Ql<`#Bs+_!dFqX+u|_6;xJi|Zv1f<Te&EZxI0~a1OJ?CoN&r@
z6Q<jB0Gf%z<O?k?L?pu*UnL^Hr8F?;b>LOwMk$TPcpj5z#ey<cFC2DRFYb`MPzqiM
z^}89;i$rf-s&NQ{aT0v7aiy`9V*;BD3pBcL2zC=|HH-eU{tP+uorH(jHlaki8-Rp<
zLKCXd0q<N3yWAa}-(;AKHVPkN<2qTki(`up3e#Tw8vPE39ZQncLZK1Q@`<`?T{DLX
zP8qq5R*+^RmPR9rx{)Xe>xe0p0oz6j5tx@3eFgs<7QRXc)kXYsMS9+^?|{`A>g#yt
z@PHDmifMQuDMlZu0Q<axuXu3?G7V^^h?5}NkrrqHw6utTGB`}Etwo~(1*aZyepYP^
zh2ayY2L>tfMch_zl=^91*YVHlU>qcSq-NufBAbN@1?-SR<?-C#vtjShYC$3?<<Wwn
z?l%)9K`tw&Tr-Kzl^f4p1SHGBJ{mMZbdiW82~KYojaF7(-XLIU>S|ZMHx2=GwyiVA
z9_MLnwm!)=o(k3jq&PO!>*%^)BuQ9>%04&*fj9|<U<4obi$oV!wz1?o+P?e1zVJ7M
zV%SuHU9CV^>UY0LFGi>qPZ9${x;yyiLj5y5{xPo^#=IrKyq<f(u2j6tdndr2EMJ(M
z@qxl_90G0*X|q$%%$X?wISb(F_~%x5yt06eq_NxUF%y4qK-)Zkis+ef68x~KG7~_}
zwRY(bShs^0@CVN?r2uj0*SPB@vj+rQ?-;Liag7Bo)i_!#+Vl^IZl+_^nihM!KU}M*
z;2Om8IkX>2_71DKyhX6~;)1GQPLYB4B`L}FSDISJZ`*7}-}{vFm*eXWJF{-s(5cu3
zN1jH1AE@9%@`ou-5Bgo*%Yz|5r(6&clJT9J1r_-<O=F#;T%q~=PB+FAUxNoGA_q|~
zECp1_+PP`@#Or9h_A4IC1o-U7tS^f;lwH4dSNE_sO~p8XrQovKyJ+QFEkOh$-T;#a
zd&F)(d*5J&#+OZI%k-56@BAV6&GE-_7I(jYAIYd3JNiPY)F>8NN1;>s-Cmw;Ma55k
z`AN;P%^2Ka_w$<yCa#fsiSwhn1X>XZvSxjQZgvF^MM`b<w_zRcit^r)FR0WNu+t!m
zZR~~bT5vgyY2WhmTf{wpBP%}Q`9o4_OZy9V5AjYm*}EDW=%;iEz|#G9%GHJ;xUZ0-
z+N1<#H5A&BkIIU;4LOlrlJxh#lP5vT-s>0%L1UievXj|I>c96p>9yD{>B@gg^K#v=
ztE%96(F2Db8tk!X<xM<&mHDwoZK3JjmkrUco)dJ}Rn1m7I`P+kpxJ#M^T1_V`*w$q
z2#M5gDd<fGRmWmtNE&%;(qnMCi$l6ZNIb+L&Hb9pX9nr%k=GCt)gmg$g04M(Q;@~P
zl%D2QM=P)JsXBkNG$y#M*HTA8`lpbKjzU=OU9lw&q6iBMl1FaTc$M3QF)_`4iB~VG
zi*R+8ydUq=gh0AF#UhQ}?h_ToI=WG}?269`G0;p=c6CxWFIuv4q^Av)`=T@<O1lAo
zHJ0|T%m6p_f(__xH#$Fv<hQ&(XFj;0sSx%<%FXRQI>E&R2YR|AArHXNGXVhSI}sbu
z*W*HXN{1_ztJJ*6fi*^0@87RkwRn52zef}61F7$EQPx`jaZy257LM%*UlQe7bPUex
zn0(GaFpbg_+$PBa`Z#|B%wuh_&K~4uIp)OV=W+9jaLM|E#Hq?FG8|EMH{+^0pczg8
z=lfn56Aae>S1Uhf7C0hUco<c$M#Ye5zVaN<gaF)O??gzTm90nxhOon%jJ>@fG0#db
zFZ@{GAX?0YUt2GTA!#ap=$`yp6$m0Z_5+y(6=2XtD?hpotgcCj6(I>0$w!XA>cOCU
z#VbORsc77Xa=*xcj|hjEsw=#@lhE`_Ut%#AayfUXPm?*H5?B<y2BV@6!M^~l<2eT*
zY&{0QN>%)IvSB>eCu=$kZwgjO-CbV>+LN9XWo`A3L1JRK8sAFB#8AGDF;`b0)=((T
z$s$=3jm^r!+cH?lis?-gfg3VtvowGmfISL`<tk7kl8@Km;VE)ASOlKodOxi|L~@ZM
z@<y~;OiVn-#>Q0zg;&5u7CDF)E#2K-;<*V(E^&-`?5FR(&RV?TbHu=KRhprrD2A&k
z)-F9Jey@IPY%!NoRiPAkoh-U%%gq|<*Q7-C`~qlM{FWCuh{azu_w~e_$^0{y@^Wr^
z3>VNg>I^EE^5Hm8Zse*0?&w_BN)Ly-1WBOC2k~zTlXn@$#2BthF^r|pWv#vSo>muu
zcb-(GEB3!^N*{MA9HZ?I`jO}jhYDz0abJm8E<Xo~`c<p<vepVF9!N;By>_>h#)OqS
z?qlPs8<tE}FJ8`VKP<*REa}usfwi*xo*W4$xhqpgCJ7V=`ZDOc>CP9|NcVa9cQu<y
z<F3yh`s$BVy3Cx%Z8nW%A6fI?C2k39`w%2jU$|P~&NU{ILg<}0bj2oRRDV_7`jbyE
zC++uxErd*c5&hm?na_v1neh6O*0rxlbt4z%N^(iZTsYh?+?da8X}$7>B7#$gliT>>
zVck!!acfFW9woK`>@VJJkFN~&7v2doE!XK$N)+B=eY<=)EcKUm{e=n%^+Mw-0!%3^
zBZ1leyB^_={FU(DL%V(L@f(>6EWOltcGcqV4d+4x%C8p-M$hiFxO2Yj{smJudLc*7
zY1%Cox9q00wET$So(_Vu<6^>~Yny?=$?dogWD{C3Iw6WyT{rxEmvB-hxpWZzbSlz!
zpZD`T_oZ|}ky*N*U-BI_t5h6-s#Pq-6_s*4W8hLUo+?Y8tDH6Z<bL##B26KMEj?Av
zGm5^WfVOL8)HX50AWXN!Ct52y>rF<=@-DxSg_|v-PVtqGUD~b@J3f5JC-wycw?|gz
z#~I}g)M~R`OVj+*eS@ENqn<u29xxXbx4^R(#J7Ui52+=-mAAI`*pk%kVV-E0Yg4<u
zyyc=f<c-`W)9hnfP!n*|&{}P06M&Z4N~!kkJtk%suwJCPO&kq53QVsHl_yH)OJk&@
zzhhw+oY);`0?nFJ`W{NqX&n2l;hF%{cc?UVg*ck&p=#V%s61pyd6`{6Dychn=t_Z-
z<&Ta~dDZGhZUpv*+cmcj*93ywt}n3*1lxMieJlE?3dId2j)ptGGJy8M;<LFwsagi|
zA~dy%oRuvDe-3`10+{NHJom36C``3llq>^lO^GfDA+R&A4bTc9C_)*ZJx}Q-y0^`x
z2E~;P;La4gDlpZ7Pm4xVTRv+KWsNA@ccOwnwJj*~b)lkXQn5t+x{?6!m_}aT$)Q>y
zCWH7z3ZW75yg$Yd^MSIqB_uS}diSsiIGRJVkVnS`XB-Yk-UdL&_0>R5?YNv!<h~Bj
zk+qC16qil;N0B`gH(OEW*VkhoDDKRm2xUsQ?`z5VIn&683l$H#*aR4QP|~V(#mtC>
zhy=1+D|+J_>rjARViJrHrE6R1;K4(Yte&4u7*$(C<x@TEnop9tiJIUQ@N2i&1zZ#S
zB(3u=o+DOOa6B)n%B%wIV-_2VM@F4{(d|mKJJ3hn!X^;R#wK80(JI0QG~k3lK5PU-
z)P4^>iCsqn8SBA=XKo=DVB9<*LvDMpZlaPe2r_|LE_MWktUCG7=c#pWpyR~SYnETN
zRe%}{iZ^bne`6Dn&98^`*TdMsQq?%X8;nin9Z$Sb*LQ&4wC+~vryh0zhWK4G-tz0M
zp-7p0quE;sitIK7tAILSDs9*OMhIsIFVI>_#VCOXnw3}!M4k>E6}@zRMXdzzhgukL
z74br5H2JD~%h#YUx`{G*oXYo8l`QEus!W;yfs!|7yv0VBDe0Zm4)Ddq!79@sV-+n?
zClyP)o>-?H4YQPPS4Hg^V89a|pa*N2&%hJ8-xeRb03S#g82sqo3KY8WpvC=w8zFqY
z((oFW00Tu~x2x<sD64E0w2z{RL{(vrhTe%>$1*hHzGaK>Ltre+Fr-W-4V*mkfh8WX
zwq=W`5rTrv2<YN$f?j%xhMt9r2J?x&2JH+uQpXYxG!Zs^46r;*BMiA21F-xaWaiVv
zU_xX2(-t5NP}a_Ml4z#0rWzi~C9su|+f77SeAf815MKg1Z9R|}p3!{cKF<7ooejmE
zk0c?B&<#ThcijgvoN<6A7>IXWU@bkb8!I{QFD~@;zng$*K9p6l7K&?%#5D9Z=AKO#
zL9yx?M$o1bCJ#INL|^}DbF%E$1%hR{1RQ@8I3rTn-579&=nq1zn+<!JKVpb(lB`7h
z$TVSYejY5!QpHildL-90b=ipehODXPyF1~u_#U#ZnvUVJ;6l+=^BofcOfwi$-%#}Z
z_vpG@6B#G}^I-f)9+C@e^&Gk|fSi1Y+KG~$3CsnzoGo_@zhC$4MK=~PH_1wE)*1K;
z>}RG43ny3ud0+{M2hU`H0qLpVvycG5Mk_5oDdiuc_vO+l;d7Gj!GccNW3YhQDxwll
zOqOdp$l{{SL`wIj{c!d0Q?nEao{ioXMo_RQg_&<(z$a}Y={QL3cE!sHUm65qU3$C8
zfcl2yZZip4$BTE*gJ(SqvT17p7G)kQaHzHi96Sm$cLTJWlTnKxz-qIhrT*%cEh*?6
z88nh<!gR1iU4RavfT~Zwt~Fc*AOSOk;S<y88;3v!T@+#nf2Sb)m*`$k6wUeP=HU+m
zn#RBH5BB)N8JweEjtLl-F^!e|cx$m86HyO)K)Pb%fd7a&)ZF8V%y>7mcJW4%5Ya|F
zl&FKM`fVMJ!v(12U4DBThoE1>&M$6_4UUsELluWJHVLH3?e^DG0c-+*U3o<2g&S9h
zKTs<%PZn&6AB7Y+t=PE`k)W~)H3MsY2k!=f(>P2hH}5g;99i)F88U0@Ys2N0Qe_jR
z9WNaKA9>GwyDqhyM%PNK+C>=pjKDV6RL*h1gGUF(aeA;GNjy0%0Z83Hi2IzT3#BR~
zn~_2GFEH7Ag+NlZk@GyF3{6~6?~@YN7G)CVh5dFlg?Etn^oC!qc%;nWt4s!`@p9rW
zWB1U#V=>|sA%XeAS&mYZpXowGO;APm`d_X@q|8J6C4I~{Lcce<bX^K$;|%S#jqRTy
zTQiqPbJ*V!cfh*0r6+9cE+Xq+n&s(*3sddPL=3rAeYu49XgSoJ_PgphR`y~XO@I5P
zK(VxKlT899ea4Q;yq6u9=r`7$EbZ1x!)ihH9ZLc$^1NxTIbg;6dy<?3x}GJQ>X5NQ
z>l&af<gUE6l^{ga>=I|WsBJrrCEGQ~`3WGkZM%fty}n!uv}7o~&*OkqVzX&C><VTq
z%}PeOc5XXBLK!tIjp{5n2<|K~tM7DA`iytOHgg|siK;HkrU+s6_)n-bOaSxcv#5LF
z9trISLbWbE28?P?v8qY~;t(>(oXt89xLDrma6eeaC1F>Z^p;V;TOtG5QizD6NC;%i
z@oozLI_cDp+XT!k+XPPJgO@~D^c{Y}Eb9<5ncD=?&~1YCn88bUkzj&N-vPpAgqaL#
zldY46v32AOG0TiIDb`|IH*=4-Ot|b4S#angS(-=L-@y>3b#HXTjf=#5qOWDb_W-@$
zsL#;fcYvIF)?rWs=8XUW&pPI}^Kr%e+fQSN1^{Qe4iJ`y4nHGI>JY3)-XV9wm&l;9
zi)5|<&HoeIXpDavnhsjVr{s|Xm+*-DFNv&LVkXZ7p8~@Fw>>3aUm}~jTr31h&J_ZA
zT*y^1cbw_uOte2Demszf%f!wc#c5d<4TEqypo=lz97twl5gfVxiLwv`jrw==z$}@y
zyHkDFC6VBQPhUSC5TFaP%6e>I9qaVaV>N6GLw(ngCd&pbqPRZ3B<N_p7SX*OGhQ=O
z^i^}1_U*ON1}&^db<!O>y5Y6bwR(WmO~*quY}tU?+Nz~Xc$2H0)E<7gkOAOnBBOw$
zX?wegdyrq-r3hQV#+l*b=N#1Pq#=eorXH;00ACMn?}~hFKn^5yiA5*}lSig}bFiAd
zL*Un(57n5*AJSG=FI@`!`K;UET-_{L`L_F=r|z=6m_|YogJ7VDLNH9WYB-U%YB00h
zx}W;-RvByNtumRjb|0p3j%L;K91YtQ8BsQR8T`oBZt}>_8Z1<|?!(T8e-B}XrJ=XV
zHe)_(NKf6`;wKm`zP_aXbGR<amvyye`-1yiCG9=fak!+T+U5x95c8nH_v8s9Md=vE
z6LP)|2cB#u2fMoDp$WO;ALk{hs_j12MjkLX9I3QWKV8kd*>;AY1e7_SD7Hf+XH&a*
z1OmL6MOT+~2>?D$#r}km#h;0^8>4+&`UG0Vk7*e%A1zy0?YA5FVwpP71a|(qGO%Cu
z+>dpsM>d<E=Kb^Yos7G8v?!w#1mEmeej=2LXQ|9DH}qv9ip_fMwLPVA4l;kERkQnL
zLjb^#-R49sP)@Zxh0COE@JMFOZaoNHjea6ro0z>r0w0Fd>|wR7f1Kv9Byk4dvR(bU
z6BVMrrb3>wExUqDD#22FQ6WUSiZxSO7v>3BfmuQ>uw4nok0gA=LiKm(Y?wM!boFc+
z=o|wgJ%s_OhgCD(P!uEbh+qu;Bz(3SOjXH1rFD;{F(7E9k>BtpUHy;7ww;!DocvBi
zkBi@sHnYWK+~&Xkf>vpDhtSi2ZCr8|JX(s}nYMab<^7oM+fD0?#L*jZYsg%ssjPGJ
zg85gUm5D_o*8O5~6Rq4IyGf~5G0NPM9xM2;E3<RQ;q=`~Mx>Af0dicTotX9g(sGKP
zx|*uE&IsO>Ygu!VY&R-uj9-ICd+MvtjOoh8VRfA^N36CpT$g5W8=7;FOndk$Bkl7R
zD|U1$a>ExZ#%>rV>`u0C>}Qme&u^6|CKMq#+s3(UD{3~Y_1x%G{2q-sNDkkWY}m=f
zwQ_IkzdJI&LF3Wj_ldj4HiPj%EF-`V9QurOsJ~Q9cV)4H2n<GDV6a=Y@K4s#f(gOR
zDac5M@bFWw!0c8tJ}C=iAl=PsZjBty6HcKQ-aRC(*i{au&*fD7;4L{aF4-_a8N{hb
zHDc@2KuyOZu+6p1^L<=Wavw-EJKI|Ppy)!+PkVLW_Pt2;OXFReJ!>0%s6oEiA-cFO
zNzYbGcZZFAKI)Ofdz7PgN^7!3QRe(Y?g=BjEtb}eH6Ka<s`FP_akSzRaEHeyp5<Or
zl^%Es2GwO+MCD&7LXlQJ0ey20Pd2+ekGhmI(%u{>9g%;T5-F9DM-T)ZuBz1Wu%ehM
zZbL-1h`|LHQpeg_k{&^@ZoN2~JbX<)SNihs#`+^V#D(4KrXK90(HhTvw08*+_A7&r
zzk;vOccL{y+MEx+d>dYYaw~pmH%V_(&HZ#pLXhU=JrPPj9&tO3v(kmiTDquhC^>jE
zlwQ#eAbzHcAP9CipO)7%Mj{9^`~XKX2_Xoa1>9D?-CVnfAlR@6Kn#yVKQId`WmxdP
z`amiO8%0&~<5v?4!g|jx%E1GG7!BN>*DxXV+@Sk(Y~Z;6R)e(JBMO78xw;aBN+^BX
z_@8w$pGj5gp3zuK-(j-)+8#i)_>?+&WhZOJFgJ+pWgZGO*b8_;Q>hUeA`*mU3Yn8A
zdLRhyrHFVS3B<0Wq>NP4M71cu1-YgW8uth0kzoKHIT}VEaeLyjl>Q2I?3o}7%G$k+
z1I5IFaxl)7c2ueNLl8(K{12gI!O-KKF!~k*cL24ulS7YzYSks6*8G+8(6Ldu+=+UP
zknW9q9#rtl+H-S}qoMk>1`j<YNd;>;QLBL(r1!w-3@tx|WLuO8CB=ar#2tE50MJ7b
z{)D0QL}3REkj1AF8V$HmvQX$TIni&}VB2SRpk%Tk!8|Cb{BxAXx%iW!bEJY0?s&k|
z86ZsN^K(GiQR)CX2FvUjqqSyIs=v1XXBu+cI}H~6o3gO%m@mRUf(XM**?zr7hRy0g
z#%hM*SJhnUcX!?$K*?}$-}ArX1)peFiUqDzyTuxL4aEei&Me?Y1>yO`9;~P-qyA&*
z+!WFNYW_doR1eIU@zjgjdfon2#kOAVWXHX6dWf~OzMeA~5GvQjg6lER`N=p8NaVg?
zZJZk<l_pwBDkgt+Arqw`#J1F`bT+O9OVPj$0!9Qrb^$zpps;45r}hPgHG}Ozxrruj
z0kGN`nOuPDb!QqU@ZX@zd`7;A3!%8Zw2M#M_!b0#zQI&p@f#XzCXM;dU*o2%D5+=T
zNx&D-<Jes!f4OR7UYsL!lgaH&Q<}Mnk~-7O^LUbSI0GMvAQ6e%Mi@O_t0}q<69Ebi
zhNb~tz?V;qVfv#6TMwvK<a0mO<sXJ<2zgK}&X5Wcjp?nBmNOlFd7*%~uzG>Ew^ZX}
z3N#G|gd1rlC&m-d{T7NE$nmSUzg`0up^*c81Jeu1hKk^$edBMh51UX+t3vCf=k*T|
zS6m@2wARvlF&JY5w7?_K{S0h<3Xr`FtJe_9wshSM`!^1H#hyzp{~h@)w`{JKRBn*v
zrXRxpRIiBWyle+K9Y$2N|3BfwL_r!RB07P;-zl4og>tWOdl&r66MQl?EI{P^yzLG=
zl@mN*qN*k?_pGdfWrWd>Q!iddI|8Hw8}0U;pwDbL4}23OkzwGpm}yLp4upA(7XJ36
zC7e^heaJ|Fit_jr(H34!0#@fh31ifV?#bzR(B6l=0Bm?g&7&Ia^=_C*LiZpk%mGvX
zdqf7+0i5KFR?l1wG;L6_y?mH10+^dfkeI~HDGUzGjqbxfR6$jPp@t6;?7-X^q55MO
zb7yH3fQ-5dB?IQ3A`y(dQJEozc#u5<HoO*+a_`D2jVobrAUCiE3wOi?<J?G%To9|W
zuIe?{QL@1Mb^eTSpgsW=u%LSuG`t`MiQ9UZew=EY1%f?Ox{1b`MVlb<I_lXPgG(5k
zDD1-?R5cW8=!;-?Zu4{s)2AR;)X5DJONBU7{Tl%s7<)7Hzwi+sszF8xMPranStZ@x
zzrLFC*TuI%av_TO(m`ttp<eu_53e}V1g<UTAUJ6ssy|xrL6XSLg%&Xm+&^x1#}9D<
zBl8S9YHDn%dFKXd4XNRP4My>MxH0B20FI@RTluf7H;(bvI((3cSy2B>BQWd!he$ue
zfB_)@Vgvm^$Q6xqFWnRPfXRjc8}M{*(tnJ-eF1`YEHo1!@dHp6C>jW)eA>mM^#ZQH
z`Mw_reE9{sr(RqHDj;#&{-4u*0S}6)WVHUj={~m$O6EY~Mn+W5#2qn!@!r9|{hwT4
zgmHb)gX{}H(eC)C|GOvtAAxO%*%Q$H|1GdT5TJ!d_kRNWpOL?TZSy;@B?4X@ZnF}m
zgL6cdMu>?P3U$Z_g4^O!F8GpuQ9hdn`WCs9MPUsQTa#5?rUF$3y8JuePE=l1`Sa~R
zCB)N1(p!qEb$$TAz0Pm7kxK5a5y7UcJ(PQ#`)2N0`EX9m`s*^-3Sk#y`>BCzkj3Gu
zqwcqWjT#RD-XN+0`B?A6u=rZ2Vvt6`5dIaAmOXz&!a?ak-==&p3MPooi)mtZ01(~Q
zB*3$!Z_8Z<Swp%>d<`$+vS%jJ9b|Qo|EdYoA(KJZFfdGXKn8S4a?iB3iZGo6<bx}o
zP}!^+pC7~&SHPP3ga^4nYmF}w1AY+4$vD`iUOi?xV-6K)5k{{VtWl5`0hp6Rb)50w
zwGYsP{6LsKv)tv}!IoldfvFN4Pt)>t%|hw%_~tsnH}--=Dp{ghhdqBTT8IQAWDHL3
zWc{oB`q@ZT<w8G}?0pEfR23k>-?FR5ZbU5I0<|VzwxCj-mZCx0yHk~q+0aY?Jf`Tf
za<#B_t%Q=9PBl`oB*}Z2(|Sf};3nT$!)yB%bWN5wdPPOr9!i$o0@TLl3#26@If<fI
zd?h{Ag((}la)SU#C}w$h6m5V!HxXgBWR^W<H9NT&bNn<Al+*jk3Ro4*=n0*nkz5N!
z`%qWzB|aB`(5_xGtH^?b7Swu}t-&P*9WjN@Z^3?Gvfu+k22Xhc9UZ^RgcS2U%;s=q
zSb@xg)9>&wS4rhp)&sVZSp7QI!`y`qI;aU_Im@IUN>=xP{8tuxbFYU9qhMU*-vPe&
zvGi@<B&=C(_z~`CQaxG_eHW`Aw+06ORR<_~{$9flK!i<eL1};<f+Yhq=poE1bSgye
zd&(0*f3Z79eAov?6B-V^**Bb|j#PmKFJ5b)S#?x70KG=;u^)<!19c){P5CzsS}>M>
z2;b5CXc#&?p`iNsF%Tlo54FfJq@W6Nz#o}7P-s$TJ(>@M23m&X?+Rq5=8WhJPlCw+
zBp_v=RUp8z2dwv%3FI{xT0*MB+(@7<bcX*u#Kix52#P>!>)zJqhv=;oeSOydZvtgl
zWUDM^u}dR%b~Ru8NZJDZnP_K1{kGEc@lFYAxAV(cPr>&6f7nPBjs5{!Z7kSDMBe@B
z+gbpF)QgR0YzBS9aDd*@F&p~@G<XC7KB#IyaSt|S%5eo%)5pK9bhb}}A&eR@#y;D|
z&#JQjZ0VS!%~uMrp;Pu|)XPlhTl4TJbnOK;KZ2%PdZ3p^+yoBzT7wpp9^l9WYOUQ1
zb^W0=6xL3$%USj-m~9mTbcav8oONNfTf*8?D_2=67e8_}3ov{W!#27HgwVMZ4`iTk
z5{plP?!a`+uirg*${kDpAE_qJL_j;x@9KkcLSYP+2X4)GG(i=o0D2HmMd*qSeSOxS
z)!%I>32UZdU*p^^{(Eyfnt*@c9U(uG&H;MTXr=$||JIi~JDSgZY#GrTFM9S?X{<gB
z$g)ljEGHBu0)h&*2T)Qkw*Bpa%wXRIp@z1QJ%-}HrlqIAgr&wBNO~YifQoVbH{;1e
z3V-$*FhrIN!WZqa;1Ge{PvsoGf%JH`t8!rW?#r3>pwu+A1|W9z*HN=#tZ(u`PG=6$
z0RE<<gASz!px4MPxQ-5`ei}?DDdqNKDhdgb|1ZI`jj4+cpl{K!4{~3T1{J6jA0jZm
zi}6=<K!M}I+0a>9K@Vx<;)0&K?w0zW>$4Fc6)+so#aW%?CI1yzjMDa5Lo8g@mr*dH
z>?w$+h}A51&Fon#UtM(V|EEBb0m<zoo;y<jb1X{wuJBgTl(F0!xIkY@kb){E!T$|_
zEh-QlY|1({fQy&Ff`0=aBtLi?#RORVJHes7>iXYUI)@2-^fq^xOG6C!0EfS^f5%zf
zd#8l0=fE()c5&1oQDuYL4-i2omCO#0UCqR!^%3n{4-hsQnBGxk1=tZ9s;|1N0Je4x
zR1L`G&&rPU(4wu5fp{ujJYhmP=4h;ffcw6^WBA|7?*vo%t)5kW;!G$q3dA_3c&P(t
zT<|mp9HBvdb{?JewyywB;#qJ3DMBN67gH#q*ZouZq0grP=FjN{jp1x9jAa^B$>2!<
zIEtJdcc~WvEpQa@0aearbhLq!)4!;IjU02lKm(!Ee4^z1>VtMrF8p^FziW^NiXdx9
zu7}524TQOf1S25rzu)3s(P>*z`PZ>uZ*@flQvrce(D>k=3fK=nq*JWU0aSysDd2Gz
zH~=x}%>2S^&3M}@o#+Y(f~pZctAJ`;1;Jx~C>eU@7fviV--@lxFmU(@$&!nC5#F7U
z(g0Nc0HCX=zYheA4=|_THx6hGy>a-x4p+La=Il`mcq#z60_R{JKg=Oec><JIUqm$y
z2!g#mEZiMR04)&7+<urPf<N@YV;wMoKAoet{KWc8ETB&OeGoQUs3|nkK|VCNTtnq%
z0NzE@@riRXfaW4!+7`X9RIn|6|HVc{D{|?)?G8Op-RJg)({6R~{YFm`iPW@;oyw=p
ziq{8hYQt?(AN@ovPU4JI^9RtWR5u<ziwm!AL<V^(BPqDE-j7w9^}KwWa<qKSWk&fP
zATePae~|gG_;X4K3^{)9h5X3a2)p%NE!ecTlUyl&PmAp(s#101ha@j;!yV>z;>>ty
znp%yYTUwiMeD4x<KDO~rp3_<WD6%s-FbA2V)HeQTZT#`=RmKX^CB0T70<E|mT3cHG
zquX*7<W}w_C-Y)}DWl98BuNET0YRY%LE%f%^a?pwPgJFqkA2XiQoDfmi`jWPRw5#<
zQ+h_z7CM6E+^W>!YGTLFSU$PmwQ0AH%Wu0wV;8jd?xf~mb%!#uMyjHH_3#nfZrk-1
z8hg=An;CpW4oh_$Enc&og@1G{lM(m{#YV7H)$+IEINBD2;`NJODWV_G=C(MPg*Jjs
zhU~~>jhGIuNXQ$l-vlp_O|bHSFTO%HSa~3!Bb`TAyn>iU-RE}ONN2heYT_!HwGX=^
zYu5$`yJKsEA5K>AHz*i^x;dSN=ZFGu;4_1tsj#vaM;k$YUUGw^`PNT`aVjqn=lR4o
z(rk_?c_Z*Rd%lhI)bMz$&48z2HAi`rwHU&^!7jNG-2KFUw2d@4PG}$Ss$4uNX9S6=
z{SsTtH0~YU+BPzJ^$e=?(HRsz3lGHaJR@tdXQCb5x79dWB31_yAWBp%C&Vj=cLOOk
zgYRtFKGrq@%lG>!YqYmy^Aqr@KcgDdcD)VRrZp2LdA7p)=`%*Jpr31Nti?>1L;PpN
z<8t`#`$yJFJ!7$-!M{i26;r!5(LWJgTMV@AgblPU`@D7tFt#;HiB{oEhyM)zdasQq
zP&}){4l7SYrP1`QjoTiPXHf6XpFst+k;+bdY;C(fj{dn8P*rnV3ND6katz<XfbYx0
z&O#fvv#l9L;%H9}=nvv_U>Cexm^MgCI5()hCc7{zBCb4}R4`%^5j$$TPSpCE(Mvts
zaz-1C@gB7kuwQ<g%S_m5(TUnj$Lg-Ok&<!4(w73~YbBKX?pmJrru<ig4GBsEJcO2v
zw*B*~GrXgltXJpXWY8HOB(Jj=(XN*ae&5#mF5ydZ82WT6zUFG@i{`{8)Sk<6&__!B
zt-Cvdw#MqV>Z!^>dzv;3^&x&lDuo?#^#QM+WUE!|b{0EZ606$zEIEvXc=Z+7dNe70
zGMegh_z8jUP*x0f#hNYgCDF_C#KrS+wH0}oO>8InS{jlyXWmhjd$;w;Xcnd}BCzdd
zoA%h}<e^B}&5%6S{JgdirjynS``!Az)ruPn4_!p<MOm(|yNx~*C9ttLxo!W+kTk2a
ziUud7oWCK;fSFfUbhG`R+cMLsC<DsNg=zbCqa>Nyat@;;@o!9Kq79^P*M04)q6s)R
z(0O#*UNpO?-NgS=Ww52(=%q@A=J<Redwk&;MH`1vccO`#4x^Oo%G+aA9TvrlRdoz5
zzKoSn)2mlLbB_w!zu>o@EHV@H7!CeWrFv3@-{h2X<U|@geS3fULa18Rn9~RmT_q>4
zeUPts@m``6DVuV26Jw<mAir4^CfRC75@jI0_~G2#t`n*3Qr<}wO(?!_z$ngSm1>aF
zbwE2{^mYr+mizS15p36WalvntB<_{x#Y)Hds*cX8u$l{IqZn$3(--hs?RHwZiDE{6
z#Tr0F3|(67SWQRg8msU(w)c9CKXb!wywC=ihqk!7rZ1dat`z3A$Ik;6nP}yPy#2gC
z{Q#2S0$2~7zD@Y~Et7A}k<$pIK_z$EUEJeXZT8FE&<)cMv>h+ndF>^4TS}%BQ^VP~
zL-v;xb9}@~*(tgTm0I|1DA&KiWyMMfZoSZ>N^UERR52Zp%}f?NzQ$)G*PILvVBhrP
z9L-ftSxO(UrRw1(e<>W~r_1<S_=?DEz;#|x{X~(lDK0%Vj(peirW#B7^A{2GQ*2Y!
zcOpG+l0Rmry~Y>!9N_&FDeK~NpguG8EY!t`g{r!WW%6Nao9xXZ#XGcwd@r6C?{~f8
z`0V0z6<U2~Ot`mHvRB$M+rlFIqj-AViwKKVwy7b}NY5QWpe`io%Q)b2RhGTPgQe$J
z+6mQA4a?-Ov^LotfXxRGth-LxIUe-Qbk)ixL#^33M}%d=?e6HUTG7oB#cM&6W62FM
zt{=i1EQl#i(yz+y`6NIlSM{Kw4Hj`pd+Apdk9-oCgpb6PVLx_ntj7j)Ey8~8-jJS}
zC>3El0Hb@mh2)bXvoWqr{1c@QRtaTRt_Ow^6=TI`kjB|~UJsN~Da495K7swY#-T98
zW|O3%G{VMv{1_h+aU*lO7|Z*JGk%1GX+5SG3x}^1he$C^7aPyi93KKprja6xEXE3M
zFz0vUPt4^OxV%Hj|NJRlBo93$Ta_iz3AhRTc7q-)vs45x+90Ts9}0k_JrIyelAONF
z$*jfl&~q*APvgZ28+@Re4BKF0zwk9ANtfgN<HrcGh)6JyNjjTy_zmg>p(34?=G9fW
z@D-Q%=1bbuJ*XEGKk01H*-)|`$9Yq8gs;pL>Fi{&9J{10ZnB;MkD{+#9aB^J_F9IX
z0hqTFhb5P92e%9kWVFo;do%IdECpFIw-FDZ=L4>2Fmk@h5v3ScCxJGUR5#J%g={Yp
z+CZodl>p&l|91q4iQM!MH<yedG=w?^!p&5Y6P8KqMKUnOFriKo+9XnvK}{5b8BRXV
z2DR^t+idk2)F%haUFQd+%qw1#P)-DMDLRsJ9nlrC8fXN4vz5lrWc_-6AlUKyLDPY3
zJb{g11ECEQyl1b8KEgl4C0Ow2TE%Ompsuiz2aESF5@POr?<RQ51qt^s*`x0?lo8!&
z``o%Oy5nb_ynA}s^8Pwo+2sX%8%jMcXZv^HxXp6DNx$x6YY}8<3UqnvPjn(wF4ur3
zD&J|CCZFj1EPi7oq)U~2Cr>W!I@Dus(B!S+uj~s4tLrcHBUDmRP+5hSmkMbn-?@dj
zS=)%3t64K=(nVh?jQm-WH*cbP8u>#1-pEf8&C&-f`XE$`^<G2_f^fANV4D)Op=^Q+
zj$X4qyQUJfVR;WP_YSuikY%_2Pm^jEv^h{mftW@W(hv_8(nR(bE=X#Yvh$s_2W$^Z
z!57GI5agg)cEC@l1eaV_qSNyAu2a1IFUfVdV5CV0{R$vQiM~d!(Cq*!0OZju{Hrld
z!a@2tW93<>&A_u3>jCiQ97r!`?K)+VUXDAgNu>kcCffm5EHel4!rep~l+^kQUquZ3
zBPW`WX9ryNojKVqAc($hL9Zw@e>)C{VAw`8^1HztF(P1?kpr^JJ^n}jKRVzsEdt3b
z(_EEJzZ7|@(s59=D8Vmi!wkS7K!*Rc9yoBzJiU;lT+s6LaES*BKZzQZ67XsLr`apk
znv<O-!@*M~I5;?i2JCZWVGSJftOWzRwbGz+evbfBdtInPCzM7%R!c$M;C9BTmH2?p
zVcad5`&SpGj<mSSAMW;3EeuBII!7Ph!Bx4{!n0owu_FxGdL+@lt>++7vs@Li@uDpL
z)xf-a;P(>9V}C0`;<r!ic8Y98H!?P9Z|cnHrR~3Eh=vhv@pUaGx~I0}5PEK+hnVsy
zjYK=`d{gS_Q@oV@ty-1tAJa8<9u35}-!4B*U{fBQ_cUzp;~C7oi#l2|d=*r6*Qnzy
zfB9_dTA}`SZ-8?A)w1#GF$L837;m@W(5L8?^go;4GZdSgM+V*T+p^nD@4v=qc;3q1
z!oTkGl?caa!5FMtM$<{rk=0>~kx4|&0V8i1uJ&#q7?{|HHFt*G1CuNj?3c_<HL?%)
z-W@wp?hl)|SG&zso>+#STe1DT&vT3orJ27iVH<F{6bPP&UO*Tb>7E*Q@GWX`AbDcA
zC&JiY|IkKM?eQQ}d5Y(@vM4=sWy!?q*Rm%D79-e?GZ|=a^^-%WmN~9X$}PT^RAZsc
zE0-Ma8}w8+&N{vm!$=o@b;_IkF|>>4@!hWWWNeiM;mE3V<Cd4{AxYhv++90_k)8p8
zNk3zsCoRUl{f4=H*m55;v;pt%yV=~1_1Qw(qS%Q-YmmdjoG$X56_b(3g6G&gV+F&=
zFTtCS7wKuZjmu|wg~!x5Ovi{%RCvKpd@Cs6VFZ-=4cC$WiEC8w9eO>|G_M=3NOz+~
z^J1r>Nj-v1w%&4u2L1UBEsb~tXF&b(#Q;4hsqQE0cfIa3+#VyLhu5#lK1z0BD<ik?
z_d1k$_$s!Nx$}EmLKAhSwqw~hrJ2GYo)At&>#Lt!6?9p%@`%X4y;Dqp({@mK8525F
zFFl-bB!V9u+V~*imzPx;SIn=h93*hf?-iD%gP-R2(%Xw3PgkOUX3zB&uiC&gQRq3+
z=nYxUN0wG~^J~qh*5}N=3SYsm>*e&Le+{fU2(JYpS*BwzdJqM~B_Zn#kpslpi;BhO
z*_8^9MBZ5U)WN+U533+vM7aA28V1yTtC)~fD@65WpY|gv`WP2Gbu`{j9~BA|G+gfN
zjh7i2bQ_ecA7LKX75&`BSIXigTcoRPYaGDCXlx9f*?q=ydR;R7elGFp^^FAE;}?Z=
zuk(8Lo7;Imr!dBzWaYJOAH3>dnXu`|^ZUZlKYp=U!GfeWK6|4?XK1EWqQiILv)(XI
z&32Ku`t97xDlKv!b((OJJSDc`K@KhlADUyeI=BvR@nEUw={L_;h7WMMsk4tuHWPkn
zOT;}fCeVWUhK+GwVq3_MG=1t}&s9a^@?<_rj2mdX?xfjV^yG!ghJZXgIQ69ez$Rxj
zhShrFPGw?ULE;t113m?|^4Qdr<%4XwOw*_K5=Hi7wu2djw6oyt04rk=p^<GRS~a#q
z?jpa%!iuFn1M(@ARpZ!=JaA3d1M2%ZW1$CjgOUu;;TZJbrWbUEFX>s?rBcI;E_{3N
zHvuEg_%+r(k{Qoy!7pVzN*LtLT74y!?t<Tj_g*PkKHh|}iQR&W=ni{qePrRGGZu(^
zIVi|huzXxAbZb?GVQ)}I;<TORA*=kR?8Y{eQN{FTk8^w{YR^q@+HJ|x@J@V3rDFCW
z&+^{j?C!hX<=Z(B7Vx`s`17B3LEXDOX<FcSCA}RfT14+T*WJ{x=UHM0z*skp%1SCu
zsJw`W8a8w~QZ&aB?;0iet*V_GHXc=EC-n?K6Z$;+NEjb><9T}e9JS+sN@E6ir!9P`
zi!(4+<Isgfy5hq*q|v8N&cI(A=KC%rOi7BaNQrUx_Epj5@6%|gjXg9J_zQ=6LwB_v
zSU&4mvR0$<rhEW+v94x3St~*^(OSfXWN*o5>KkW^WWnj6Fn+4wt@U~8U{kqNK)QH|
za5;5qD?p#va-Mo^K9vQ3jyf0xUOAHcm5S50`|0Vfa2S(V%rIbVGL=QVi}Q!!cKRq7
zOJV5*0LsNvfICOEI2^!Ts#bPmY9=#Zj$)87KBSZLo{1WEsLQjN4$dFNX;XmR-*7cX
zpVG95O3>D??%)jk*+~c34UH2=lePu_Pf)&(mL+Gp(*X$ATX7D_;;zQ5n1aTRLo`p#
z$(sYoo&(Q3-v6Nq`1qVC3Euk?mLvJbi8smK)GCZ$D}2VsnD9&&=P><BrZ-?URI!yW
zX^zF)fN9Imb)K53BsW1c2l7~JYAF?`Q|`)J(Hty<!S_*Ne4=PqwDmqyd8qXeC1Xe{
zHlJz7F){UwmUpUFqaHstTGxz4U?>y1+{KA!<?@U=NegRewqs+STIvp(i7=)p@sm`X
z2(DO3@Lr=b+F{M+smbz0deemQj|3gI0ASftnsC^jOK}l@PGCd*wa!%NQRzWI8lXT$
z6#&G4(2}f$wf`Xq)yb)L>SBg})F0XaW;0x#T}}e;S`Pl!$Am}9I56&7I1g|vV1td#
zRGe|`)h}p;a{?a-tli=Qf&n!O^I{Z6{*9A9K7Le}@8hAwp8>R?>HufuT4zX{fgw!?
z3#!n*R(lS~R0&Xvi+ln;@yN6bE#3%7iFWmAS^_f*;oA29wc5;?)d0DFTMa$>kJSJZ
zc$1u(u`VQ@E^2YOz^)F<o<Q>t$Y}e41(;<;5}E{Pdq!{&t69KAadPFr1tBSv#`DyU
zH#aQDfW5>&wxNSgalVsN%2Lc}o;n&PEC=wSWdR7KJyqh*tmgkeSq<p%AFH+fPgeUL
z^7@}3TX##rrQpwj1Y8-I`v$yxH4@yM7M`H_8fcmEQd>X8{_h|9ICO4~Mblwn329(q
z5&!-}AI#r2MHwhNj4H<0^SnDn{P@^uX5Q#~EjIpxokOhna=F-(Z&uRJU1^j*5$fL1
zu?aYqXHTYxdO0&DArNXWd*4*7!E>#{>&oX-`z4_Of2vdOi;L{XV>%2ce)Qf)rSOTP
z!-l%J{D@NyP3pCV;YoeNdGeDFlJ}3N^Oij-6E|1<i{@_kp2!_(SM_p`1*A+BKOnZb
z$Y@yPyJ$_mF{<4+s#24}s2X7PfMA?X`rJ#?bA`(1c9^CzCPqQxk|{mZ7k`T~n~YLH
zg-xQ=nBBymT9oB5u5B74k;1EL{e40+EZO(2)9UHBR$rWQ?x<q(&Qpop3clW?mo51q
z?!;hht7dn3A#~=0iexd#Y&A(lExqxBp!F*>x<vAJZ8(D~f~0%#t{ms;jT9Tav5&R}
zNF{WZ7xcx&H}RI9x5#_dy^=}z0%MZrote);7O5%W%=>Aw<ULY{{gmw=#XoLf{A|AK
zmAfc=Ls~)cMlz|Qh9b*2<7BaM*L2=u!G|I6Lt<XsDi<xo$*xz)+<ucrPmp9vCE*SJ
zHCu;TrH5~qoo}lV$=s$$cz=t2C~E8hyEly5AzmoxiG3uuI0yX}mF2ovrC(`{Wcf5r
zUGtatq&pqj*3?RlzM42Xo-bejptPiHDB$Pz%{_qUNhjv2GFL<Lnfg#XaQ68Jp<96!
zw~mxr><W!GC?^H!5)>lKem)*dGM5nSyT4s?UZ&2$eKovVDQ>Z>+)~G?$}mm#=C;H$
zM-g#u6<F<!9__GM?@zY)Gff<;1uG+YdEx2GCNGsV*>QzFLm|&!hmI`tW)v@fe6kbK
zdri9y;%RUp9cTOE6(<j(`cE5SArPfvSj7+bwe+7M0Vns$=ViXWAa~A^z1td4&9?0(
zz1P)IzhI%r2eY&|SO~sHb5&paW_GJhRPK*EhoqjJD^L2*_uc(PO1f=zwc@G8esEdF
zdo2D6LDQN5Xme5b%PW`f-0G8yjBJ+W!Ic<Wr}tf5LZqdHTisFckf%vND)!{LeQ~L1
z$<+B2yzZ2Q+!5PSdTykc@-?g?xTd7_XYER&?T}ziWj|_pMeU>46KK#4l_oCoBQZG{
zlXZ$r#HU=9%PN`|+L&F;)cvbgjc9S2Z|)-oL`n&XZ=I;b-h0{BT<Srs(XXR*|L})Y
z!@0pLd@5uKwnc5PTU2tepF7eiCQcunt`X~ahdkikzzbUukM%iWx?<SLe6bt93P<^V
zf(n&$OZcP$f7d16A5k}{;JqyB=cz|1z8o2er>7rSJl($k;cnt9r(VUvz*VN(hnBE0
z!v9s*SqH_@Zu=T{2o@M5FlcZB!F6B=?he6Wa2VX(Em)A?5G=R_C%C(NU|<p;NN~54
zea^Wz`+l`=cXd~Fz3)@2x_bT9zg6|%qa+R-oyeH|DDI6r@XAz6c9K(k+hyQ0J#Z~Z
zgw~rwSVBO#AtxoR<#s8e+)c?8#HW|YObA3u&hj9SwAuI`2j|gz5lJ899Hj`ncBRTq
zG;RJdik7&ALw5h<l<`plk;H}v^NZV<Bx%_1>fnJ1uZWmA&2v*vQh&OX+Di#S+nS>8
zT@368T)_#ths&2HJ&re>_p4W%k(yu#f&B6)l00K%W(1)M@s0+sDDCIOo(JY_To8t|
zKA~!!`E3;_ufWv$E_08|Avpakj{~z@M(WI%)Tt8M*C;pAeo?G2$fIoIh!H$LWHNp~
z)O4H`T^utjz?NMyZZKkFp^A?7OpY*b5LqAw4dw&>EE4X<C!3ZgC>IogG4fze8(cCw
z?_Bn&Ro5=o8K|*&JC7J#9&CepX`L~eO0hH-6uRn|;3^q>aWdvN>I8Li{ahtRwV^U8
zP|7K1{d~fB0<UkcN?_qVk)B=JyMo>!DJ-a|GcgRzN(;vTg8MX?lVAOPdMGpqU-Y&Y
z$<4P~eylQ}r6uZ<VH6N^W3V%sn?y^f_N5=x`mQO<vPw)!VHf*>S$VWSIF4W$S40l!
zddNPWz$F>k^+zbA+2vy0!*0N9k@I(X{s{@3_?XC|8J(bmfwPlFq_%@MqGo8Ar(J|?
zm5o6@WgGN0*a=VXH-EpN$qFCM0Yo8m=%CTqMg`BXy1(M$QHSOkdVQF0-FjIPWZoYL
zYJTQxjLKz5K}wfI^Fi!Mm1a!%_Jl2u9NBZy*zsBe5>l1e6|&p;sS7JFjDOYQHdbCD
z42#V@rk*WY)3Q@rbw4X~)mUMjBngk+@%J$*WYbPjOuU8`HB)ts?S*f-j3Mm8R<5s8
z8H{o=8dF}`W)iK#TA`|`Co01dLq6{hbH+}&EGzTKD`~1IDgFM~X9}n-a3<Ffj_T91
zK@oK&-UTn<&Nc5u^G3qvG1l{4ck1y&&4D*>3|8M3M)XInIz{w}$#F(2Z9SR-h(L8z
zi7vDkwssn<8<jo8o76qq-q&?24||NLm_6?ndFZ1qPs}y=vt|&A#K!%&KkCeUWw?Su
zY48IOjL>9kCtpk_ZXNq(-;+TE6<_w#PgaV3eT26ToFavtGCos~;33h~kr#dwGrro)
z&4;%5Da;Q7q>$1ghz6@iYADRtTwVy0;y<&_vFst#`DBhiqBMSnhrAA2DA@O)UTYgH
zNKGxtCmh`hI<dau@&I|}x0VSxj5dd31x+zINdJ}%f?PD6lB-hDM%p*gTSvWF#Kse5
zPle8TlI=R1SR<-)qw$dM@4#vh%E1?41iXZ3svs!0|EFU6Jv!Y4M~a5MbTCmst0<UJ
zd7A<w&BE`!Y9#HnpSxn{G2@;?lPTJsCLz)fELO^=GhUzdiCd=-!1J=fspq$RN*p1@
zAC5Ghvbks*_hpZBkt}N9LOIxiV6~geN)(m&3l)=nHj^M^#McwZ+Nxus(x;nE=g*3L
zMAQL*_TcW!DMOI2b+a8C2w5{}vnvovJ$<NcT49%RmGLCP0{rpV1@1qm=KR*}M<?iq
z7MhGyUDWwq+8<4ZQ~F-fKjKRtv?h3T&1_Ge*+wrzv@?HbNE#=z+wu<4K7eO!o}inN
zkxFCd<h7v5%ku~EwMg$zt1-8*@-JFg>Az~FUJV*?jOI;1+{yGy`-440L(VKozr*Z;
zRNLD<q-~x2<_qC^q-M-&oL{cL6RYleoS`d&sWgQw`$JqNL%JmE$f~pAisJ^8k)gZn
z*b|TO&MbbO(eDZKH*nzs*#!YdcM8|IPg#-y42-H3ek0U{QN~VXQ%a9cwq1c%8Hy4#
zJ&HxQXL7`-W$;Wnw+yhLWU$CSy1dJ_RXs77HO|~#&3M<!J-LP_#7St#AN6I1c{b=l
z%ACvjUBsg4&oJQ5)2@P9%kypM3{q2md>Uy_??+*Fc<Sy`zEhFoTS^+NFyN2?zSr-#
zQS_oztl)l=6rM5M_zOuK5V!Y7GvFA?@~xI(<@#~3kb*@>i6kQhGp0UDTpgYWr(|L%
zS7-}&;ozS^=!h<ZC;8xo_TlY3KCmGrZHKNOZirIF8F$D9ygt~g?v5gsi>?(LGiIA>
z2NW+{M8DpnV;(X%X^v9K3CsD+hW{pwLBTNOv%d)A;S~}rxyUz9G$(lzkn{X38Tj=b
z;zg+dt4jz(=aw>9UcN9QQi%moiw!arR8vTL@}z@7{~&nik!<4Gw5ahV)LW0zY1vrU
zzy*xDYnWVAY()Iv-Dj#$_+@L@!#g=*?ec90R!Q&Gu+@R0T#n>Am$+TEeVL6$62DF+
z6=nMZsc(CbPLc$lKuVeP{6avEU`LBUvRDRgn}pa6tMwKyC+>!dBLkbI2!j`f*WuQ+
zValW9&Da}`u=<O^Y8E$bm#ORJx}$dI6%3VxcF%?EY9eBbffN@6zH`U#@C~<3_WSaa
z8}>8x@cOc0q-_{vb<N`Dan|ke755^IA`8ue@#JtX%pY~wc*idDrEkK5kL4RhCBUbG
z2NO_AV8d(u-_nxfl<+=iMcL7HyD=lkKYE@yw4NgMrQ6nNWZzuickvJBI~;Tdg+=bg
z*du&u$WiZ=jE2~}v6-AhaJURg1e4ITXqYRwX~!{0@5~$BuJdcCy>JV9@4BSI8|*OD
z^4nikEZfnqI>e*<v~p!wGO~_o7GEtv1TqCwr-NK=XWut5G7$g{YwEyV$eJLq7(<7w
zFa>EF8n=$^OuVzymG32hl0i*ECyxpjmj!!Hy;SCScdsZfQejFIXVk|wCjBK~E`uS5
z9eKefo=A@pFJv%GnaURDph_wjd4&!nesd5Y5I*6iOUL2*i<4PhYq&?wdGK_DoYL`<
z!l?x!gJ5F6u`inK381%%B>k+4HW2mnq!_nmnj6Vc$MWjiu<algtq)}3$o!kXv66qE
zc7;51(sV1b6^ts5UvTJW<(}&z2Ma2QO;EzX#i7jkrB5Y%3_L8c*hK+~!X!&1W#$y+
z$9VLsoX#sJT0SrEmcu1ka}1iyJttrqHcRBxxGWUR>D}3D-ePtR3RGu0=snd1^*0(X
z9vKjx#ZLP$UN6Q#wG`&jTRg|nRTf0vnqySeAy}tQ9YIDzwJ2ADxxjovJQH-rrIM0!
zBvrInS#<?Z{IaK0jFlg}*fGes1jAWuP-+}{MTZ~_4>J8Z;|##p-bL<yfx6qX7UF}a
zsOGgZ=6i27<k6Z<d@fQ}1|gV&in}|0fJ~))Ql1ASG-W4drYWhL=LD|B0*5e3e7mnw
zzj3i*e^F}Pkg)W6{6Z0OvWeW^PL8HPz&2fv4iwUQA;KC<+gOUHZnlGpY9!p>>*QL#
zM*lWUSjgBWW1K2!vorRid^yZhX$iEOybZ*N!hGj+?@hUUoZiVw$!PydAm3K;IE!5Q
zU=RbgA$ftTQ|(kXW5T$$?VIg$ls+@#5Vb)~TGHs0{L#&kO(v15bOPqt13~iAx1Z!n
zOix)XK)2FPC+-}p<Oe+Xu4QurQEfEn;+dB(i5xbz&-RyU+sI3=a3KCcq(^e`N`@pI
z=O=0F;*Kz*9<x<Fi-H^C8TfCs+8?JAvAXr$B5ByFY^0G2{;~X;rcj*dJ#JA-Bl(F9
z*01(ripW*8HTlVFUJeb~ZT-Nef{`-6CU80y8Hz(kyP|)0Q{F0-F&(5kj1`&rZ3At!
z5kC!JTsZCLNB7isMys_~fG7412Gea-xHr@U_-K}sEW#;KQ5%DF#N`8^wga)|U6CoL
zt(b&J>zLy-Y({|dk`4%oKDvoDpwc!oaD77!!u4n1^J)SmBy+T96l<Ju#ak||2q3iF
zVm_RSG`YG<N-x4Fc;26+F}E%d-1!BRgRR}Z7ejZavzq<yYh8lT*bUJH$bOSW#a<6X
zf^o6&#yM+ICIy2L`;w`yDV&RLFlTYSuDxTodM(+MtU+Y!ti}0)a+R+9yM3$GA)>v-
z$mO3Ebxf2~I;=uBM7c8H`L|E{m(w?V2Q6WoGF9iF+QR4rIMgT0_Lp0~N8DGP^Zvkj
zB2RDg1ePA!sP9DhQ4fo2CQST(gnYInm9LVJ{5?H8+X1A_-JRDre}STIwHxkMK?c6J
zTU&Z%A{$`1=i~$n+tQWU{;BQ$;bU@Ooatm!+5DCdyWyRX+DTl^o=8zlJW2tN!Tv-}
z*a;dmJbReK|K-Nk!aZb^WW>B=!@4s?fZa)QzjBpz+;Ob#(8*B5+ZS%m%QejxYg3#g
zF)pide*%cro<-;umPy-{mDw8nGvBNRc0KCrb`-o61lxj{7m=R;cq?0t^==*nIVaHR
z?uJv9n-;{-m6KRg$jRR4m;vGOhy07J@ZhChpo7P0czWC=RVgD6D>75`cc#RX_XPI2
zpgsJ6W{1FB3a@Jy&d#KbP$D(Pk({U^WBMSA`nU%^qeFJbN`vfr2SKI|@3KYH>tDnY
z$j=>M?{XY<7hx=++t^`bq5R_o8)Kz4u9%ap4tzZe>My{c$|W0V0U;q=uvpv0mM_pk
z{Y>M1fo#0VXiYxONjQG$dw)q=6<+NfE+rxugpB|y-Mhz6UEmY)dG>58s>tvM?>^l8
z@++`4UEK}<&9`(2@Xx*iz-N^QlHJc$7nCB2^rAJazjz8FsAH5668pC5GKaE^RAP(Z
zOAYWn??UFFqa`phQVIyu`|nwo(l!p0xzJ2RVC>TRyX_CaS|Q6yuOY;D)9op}kS4eE
zuO|AL-*Bnvmjo1(;x{E@lZOkvpJpOcUD3+egxI=ws-z_!{StPf<zT2%uV<P0fI^Gz
zm^M!IyfT$fBz_l8RokfUqmqgBY?C>LJyxWI;va-LjQPG#wP%Tf)?PwilPs3*pjWEb
zGUNHzx)@xc9&@OeUPvHX^``6D-2G<D%EP)-MxfW-()1NO+|aQJvh983OHR*y7P@jA
zf?RZwd7;-iufU4O1JaE;4R-rKt+O}pQEbt`ZOwZ549ocU(vAr?`4=!vrUDPec#dw7
zKrGcYnf)@pB9hk+^YXIxKt!|wye3qa9Smut00qm7t@pgrDhbgyH|BHUg`soWAEu*W
zzhX9F+g@$k&i1ZHi_PPa3|69ctvEa+Uc3kVIzxku;cQj#h>;R<GV7KO<sXnI!g{O9
zLF&jS=UADbOuQEgJ%==VACur`4&U3i`n!^O)Kk6g@oStvsia6z$%rH35mF%=gG3w@
z`i7zjBfVg%&L^`^1o4r4UAX#vYcsQ_NgZE_(Z?R;FU`C0BM!6Gb!qelW?2yq$}xgB
zBPd4PTtn1ZDQ}3=RMdXaiOJHOT3G3kM85OQBn7B_;x6u%v_kxn)B~V-Xdt%i)x<33
z&wTFWh!Tk}ToWHGInG<A)Jb>SrJ`T1pI+7))ZTmIKH|DQm~C-8`ia%KM=byO$I>M>
zv-Ru3c1GJON=(O=*3+zMcs<M0BQawmjPn@3(H4s8Sc#S8EtLC(dg&a*B(mfohJZKT
z9pO=|On{t|vW*6M@Gw|C(?MZYeQ^t*E+E}22+(^mS@?)sTEeRK26jV)EF^(uk%JSk
zpEh{2QWegzbt!8nU;S<siOukk^(o6y6gJe*@|O5~%!WNym?rVfAyl`U&{V3Db5OXL
zCB2j-=9&S%(<(iisy8zA9@SsMJsXa(6)s}6m)$~~F_scWQLWds8?FDc>@fWu$u&dq
zDi=*;XKv=f&H*6|6v;uANW6qlocmLnAI}}=dBGb{jeU&oWxiEBcE+1J@e>BWB<J`N
zku@G)6xY)I6Qg(AIXOM%xsBl#Uit7iecgw0`P{s7e_bZ=dKT4)!z)RP609FLD~V1y
zwa(ugxxly0KQ<-0*Kh>~oIg)o3xq6(<p!50TyIUXU6_*GJe8b;e~_7;c18Yb{64iQ
z5O46qBG&6B%bY$oMaN!L9kcplGwQ(SL+!zkdl+yi5?)wE%&}NHoF60iBLjiY0JntL
zy1)KG=8hqa_OV`}t3qD{wCo>jAXI73lfr2rPRvINP6X7YDQP`c^)yrfQAi{ka*gaY
zf61jT<$X~PN7Pk^jHLC4z@}Ve2@;w-jPpN5PQgj>Ot6DDAIU#DcYnL9i!IxFnw{Im
z3TW?N>pwgvl6d`fh=cZ10>a(u&x3{unpm_U703*dfYq-Q$IbXL+C7gDzs`W3rje1v
zY^OZJPN@Qwp_oR~sx%h_DMK9zafw!?&^7ok0b*$mgm?2|NJtj=LW?J4vv0<EHfCVR
z7c&uB1~gt=P6nM(-sslk>LLYdxC_`*BN9v;F@Gk2J_7jX*X9VjKYC$R-j;~_=|^1D
z2|u_5^*6PLUtt`7CI(X8#br?sU`V9;Q=c9V0#@`V?LL$lDR`(d?d9LJBm|rm9L?%P
zYu2WO#<R4h?I1%<B$E4ih$dZkj%=rF)}y!)XL;Qk$48%0Q@2!hPkvt5v{V;OOTiwi
zN$lzA_nt0V#vj0;&$WVeh{l7yQm=OG-<z>CxGO4nY6zee@xHm(yqJjJ(JutAJ|DsN
zT__?XPPEE%y1V4=+tQEfR+UxVwc2GDKYM7^h~sbpmR1m*D7yh-VmQ4R)f>=eqV-d)
zW^~HIy2}syp?X^!p?>>;9SB4x&ZVA{(>xCzJu(;2_qPsEa&t1SMbme`%*6yM-M+co
z4LB#UoZCBypJ4+E%6fre(d`hWsy2!~@*WmlbV_5sH)_D=sndZcA(ib>B+*@mF0}zv
zN>S=}sG(wSMc(kS)2;Bx_m-&*Gtp2R)R2pWkEVV3^T$5diVcgN7d!k!dN#(K0~sSF
z-4H~Hqf5t4qw`3}t0`o(XAg`|2b?u74AGc*554}@+xS?2{43b_N#Iq@Yw}*NcFSrl
z0&2Phs^E>xPeZp4WWL#tpLWTTF&*tm5^oYzT0{)Hsp^d_yU=Rd%!&=O@eUB%eRNId
zTR83Tu%jm-1k5<IkOGxT#XQC6{&rI16zZjQh;C4Vk|Y!Kap%>i+jwZp7vdl~BYoSR
zRO0+h{W*(iKR*U!!9CvnTs$vjDk?paGX3m+Kp`;#D|b$hJwr~s(s(0b?OZ5FhvVH3
zvOwFp*H$Q#Adw$0<GkAzwNY~F(503TycuI|#wogYp(M^2<h?<T0uCXGDsPQG9_OEn
z_y@ZVk0B-2pZ5t*>;=0&WDZCL@z9GlP<7(r^Jk1^yMh=v$zDiY<cVKzVynR_Vk~}m
ztiB<y38cZw2Y>se{I33E7IsMfB1|D#Y~{j1uCetMXV$~=dVMnP_&Dk{RIxRcHUn_;
zcsF|jdX*AE4o?Cq8%A{UMdXk=2pIkpK1JU)xHwe%a4q#g*~`)Sb!O3T;r{OP6D}bK
zJku&d#x-Zz=2TMN%Aosd%09QJ{k$-1daF>UG=}GYlZO1ua^h+hg|G6*j-iD5Kbei2
zi;dHNXEs{%qAo~WIH)kOx2-6=;SfFj10H5Vta6Ltsax|gv<(WT8)={R-pucFN9WhO
zXC99NIVzj!Ynzw;@Li9`%q}D2`Tf>A!Ct>YWl9R}8B%&iByJ-hoO?#QcnLRa`h$1l
z2FaQ2_c!oQf^X)yh!Ud(gA-N;n^UacC)ZacpB2ke5+4v_N8yT_!;!g|Uz(|>nBkgB
zbCv2Ub3f^4B};QBe-v1f3oZ|*FOr`v&b7t>3v7ejq!7LWQPc=rLmXY3;a;vpU~UyP
zSE2?>=9TiYHU{S6N$DKv9OWEzcaMFXPm0Ab3tG(zO^tHia<YYN+iN|S`gQ2Fua*7J
z?AFbK)Uih~A=j=c{ld%{t!$z5-!dHUTq|Cn0GscEfW$NS5hvk(+?v;h=LG6Un%I?%
zlI6=QDYhSM&v(?WFqaMW#z<k)LcxB>V!}gg&#DTDNW=*Lf4j_|xD4UXXZYXauWRPt
zx&I`2`4@nQ0Q_^W`*Z!f^5rl2pY1Atw*PNMmp_I75Bb-%@|XY59+kiR@E`uKRpp=D
zKQ$kJInzHzmj74!@lW=jiQ-?DTj77$|L)a)V*hOFzu1li0>b}oaa9FWw0|w3|2b3s
Lyn}(|zn=aJ&;y~I

literal 0
HcmV?d00001


From 97785a3658b066771d439d4b46cdea5951cd7254 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 1 Dec 2023 11:26:54 +0000
Subject: [PATCH 02/30] initial structure

---
 .gitignore                       |   5 +-
 gtfs_skims/connectors.py         |   0
 gtfs_skims/graph.py              |   0
 gtfs_skims/preprocessing.py      |   0
 gtfs_skims/utils.py              | 184 +++++++++++++++++++++++++++++++
 gtfs_skims/variables.py          |  18 +++
 requirements/base.txt            |   5 +
 tests/conftest.py                |  15 +++
 tests/test_data/centroids.csv    |  19 ++++
 tests/test_data/config_demo.yaml |  16 +--
 tests/test_utils.py              |  33 ++++++
 11 files changed, 282 insertions(+), 13 deletions(-)
 create mode 100644 gtfs_skims/connectors.py
 create mode 100644 gtfs_skims/graph.py
 create mode 100644 gtfs_skims/preprocessing.py
 create mode 100644 gtfs_skims/utils.py
 create mode 100644 gtfs_skims/variables.py
 create mode 100644 tests/test_data/centroids.csv
 create mode 100644 tests/test_utils.py

diff --git a/.gitignore b/.gitignore
index 06fd923..fb87f1a 100755
--- a/.gitignore
+++ b/.gitignore
@@ -37,4 +37,7 @@ reports/
 mike-*.yml
 
 # Jupyter notebooks
-.ipynb_checkpoints
\ No newline at end of file
+.ipynb_checkpoints
+
+sandbox.py
+tests/test_data/outputs/
\ No newline at end of file
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
new file mode 100644
index 0000000..e69de29
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
new file mode 100644
index 0000000..e69de29
diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
new file mode 100644
index 0000000..e69de29
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
new file mode 100644
index 0000000..a5ab706
--- /dev/null
+++ b/gtfs_skims/utils.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+import yaml
+from zipfile import ZipFile
+
+import pandas as pd
+
+
+def ts_to_sec(x: str) -> int:
+    """Convert a hh:mm:ss timestamp to seconds from midnight.
+
+    Args:
+        x (str): Timestamp
+
+    Returns:
+        int: Seconds from midnight
+    """
+    s = [int(i) for i in x.split(':')]
+    return 3600*s[0]+60*s[1]+s[2]
+
+
+def get_logger(path_output: Optional[str] = None) -> logging.Logger:
+    """Get the library logger.
+
+    Args:
+        path_output (Optional[str], optional): Path to save the logs. Defaults to None.
+
+    Returns:
+        logging.Logger: Logger.
+    """
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    if len(logger.handlers) == 0:
+        logger.addHandler(handler)
+    else:
+        logger.handlers[0] = handler
+
+    if path_output is not None:
+        parent_dir = Path(path_output).parent.absolute()
+        if not os.path.exists(parent_dir):
+            os.makedirs(parent_dir)
+
+        file_handler = logging.FileHandler(path_output, mode='w')
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+    return logger
+
+
+@dataclass
+class Config:
+    """Config file
+
+    Example config file:
+
+    ```
+    paths:
+        path_gtfs: ./iow-bus-gtfs.zip
+        path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow
+        path_origins: ./centroids.csv # path to the origin points
+        path_destinations: ./centroids.csv # path to the destination points
+
+    settings:
+        calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file.
+        start_s : 32400 # sec | Start time of the journey.
+        end_s : 41400 # sec | Max end time of a journey.
+        walk_distance_threshold : 2000  # m | Max walk distance in a leg
+        walk_speed : 4.5  # kph | Walking speed
+        crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
+        max_transfer_time : 1800 # Max combined time of walking and waiting (sec)
+        k : 500 # max nearest neighbours when calculating distances
+        max_wait : 1800  # sec | Max wait time at a stop
+        bounding_box : null
+
+    steps:
+    - preprocessing
+    - connectors
+    - graph
+    ```
+
+    """
+    path_gtfs: str
+    path_outputs: str
+    path_origins: str
+    path_destinations: str
+    calendar_date: int
+    crows_fly_factor: float
+    max_transfer_time: int
+    k: int
+    end_s: int
+    bounding_box: dict
+    max_wait: int
+    start_s: int
+    walk_distance_threshold: int
+    walk_speed: float
+    steps: list
+
+    @classmethod
+    def from_yaml(cls, path: str) -> Config:
+        """Construct class from a config yaml file.
+
+        Args:
+            path (str): Path to the yaml config.
+
+        Returns:
+            Config: Config object
+        """
+        with open(path, 'r') as f:
+            config = yaml.safe_load(f)
+        config_flat = {
+            **config['paths'],
+            **config['settings'],
+            'steps': config['steps']
+        }
+        return cls(**config_flat)
+
+    def __repr__(self) -> str:
+        s = 'Config file\n'
+        s += '-'*50 + '\n'
+        s += yaml.dump(self.__dict__)
+        return s
+
+
+@dataclass
+class GTFSData:
+    calendar: pd.DataFrame
+    routes: pd.DataFrame
+    stops: pd.DataFrame
+    stop_times: pd.DataFrame
+    trips: pd.DataFrame
+
+    @classmethod
+    def from_gtfs(cls, path_gtfs: str) -> GTFSData:
+        """Load GTFS tables from a standard zipped GTFS file. 
+
+        Args:
+            path_gtfs (str): Path to a zipped GTFS dataset.
+
+        Returns:
+            GTFSData: GTFS data object.
+        """
+        data = {}
+        with ZipFile(path_gtfs, 'r') as zf:
+            for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+                with zf.open(f'{name}.txt') as f:
+                    data[name] = pd.read_csv(f, low_memory=False)
+        return cls(**data)
+
+    @classmethod
+    def from_parquet(cls, path: str) -> GTFSData:
+        """Construct class from pre-processed GTFS tables in Parquet format.
+
+        Args:
+            path (str): Path to tables.
+
+        Returns:
+            GTFSData: GTFS data object.
+        """
+        data = {}
+        for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+            data[name] = pd.read_parquet(
+                os.path.join(path, f'{name}.parquet.gzip'))
+        return cls(**data)
+
+    def save(self, path_outputs: str) -> None:
+        """Export all tables in zipped parquet format.
+
+        Args:
+            path_outputs (str): Directory to save outputs.
+        """
+        if not os.path.exists(path_outputs):
+            os.makedirs(path_outputs)
+
+        for k, v in self.__dict__.items():
+            v.to_parquet(os.path.join(
+                path_outputs, f'{k}.parquet.gzip'), compression='gzip')
diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py
new file mode 100644
index 0000000..f1e9591
--- /dev/null
+++ b/gtfs_skims/variables.py
@@ -0,0 +1,18 @@
+# route types lookup
+# source: https://developers.google.com/transit/gtfs/reference#routestxt
+# and https://developers.google.com/transit/gtfs/reference/extended-route-types
+ROUTE_TYPES = {
+    0: 'tram',  # Tram, Streetcar, Light rail.
+    1: 'underground',  # Subway, Metro.
+    2: 'rail',  # Rail. Used for intercity or long-distance travel.
+    3: 'bus',  # Bus. Used for short- and long-distance bus routes.
+    4: 'ferry',  # Ferry. Used for short- and long-distance boat service.
+    5: 'cable',
+    6: 'cable aerial',
+    7: 'furnicular',  # Funicular. Any rail system designed for steep inclines.
+    11: 'trolley',  # Trolleybus.
+    12: 'monorail',  # Monorail.
+    200: 'coach',  # Coach Service
+    401: 'undergound',  # Metro Service
+    402: 'underground',  # Underground Service
+}
\ No newline at end of file
diff --git a/requirements/base.txt b/requirements/base.txt
index 9c9350d..b1ccdb9 100755
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1,3 +1,8 @@
 # this dependency exists so that the base file is not empty
 # it was chosen since it is a dependency that is included in any python environment already
+fastparquet
+graph-tool
+numpy
+pandas
+pyproj
 zipp
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index a6de8b3..8f6127a 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,9 +7,15 @@ def test_content(response):
     assert response.content
 ```
 """
+import os
+from pathlib import Path
 
 import pytest
 
+from gtfs_skims.utils import Config, GTFSData
+
+TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data')
+
 
 @pytest.fixture
 def response():
@@ -19,3 +25,12 @@ def response():
     """
     # import requests
     # return requests.get('https://github.com/arup-group/cookiecutter-pypackage')
+
+
+@pytest.fixture
+def config():
+    return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml'))
+
+@pytest.fixture
+def gtfs_data():
+    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
diff --git a/tests/test_data/centroids.csv b/tests/test_data/centroids.csv
new file mode 100644
index 0000000..5fbb1e0
--- /dev/null
+++ b/tests/test_data/centroids.csv
@@ -0,0 +1,19 @@
+name,longitude,latitude
+E02003587,-1.155884355526687,50.72185610082279
+E02003586,-1.173878456845198,50.72301086140903
+E02003585,-1.224069337360162,50.72613466907799
+E02003584,-1.158508858308219,50.72873880022016
+E02003583,-1.28149160450734,50.75184512743099
+E02003582,-1.31006711916123,50.75340385923516
+E02003581,-1.297514083246226,50.75714214474556
+E02003589,-1.302344772901295,50.70267302231341
+E02003588,-1.284284792950486,50.703256856445805
+E02003597,-1.283759136860344,50.61568691131167
+E02003596,-1.175831901564544,50.63446808149097
+E02003595,-1.180359149209377,50.64417605386147
+E02003594,-1.154986228595743,50.659399194734654
+E02003593,-1.386537290710913,50.68165562633486
+E02003592,-1.526357692381475,50.68396619141156
+E02003591,-1.30109588996732,50.69411141243501
+E02003590,-1.096598389236477,50.69424449742397
+E02003598,-1.210809598549173,50.59781051582961
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index 66db500..85e810a 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -1,8 +1,8 @@
 paths:
-  path_gtfs: /mnt/efs/otp/gtfs_transfers/iow-bus-gtfs.zip
+  path_gtfs: ./iow-bus-gtfs.zip
   path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow
-  path_zone_centroids: /mnt/efs/zones/msoa_centroids.geojson # path to zone centroids to skim
-
+  path_origins: ./centroids.csv # path to the origin points
+  path_destinations: ./centroids.csv # path to the destination points
 
 settings:
   calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file.
@@ -19,12 +19,4 @@ settings:
 steps:
   - preprocessing
   - connectors
-  - graph
-
-# Settings for AWS batch (Optional)
-aws_job_definition:
-  image: "815306348607.dkr.ecr.eu-west-1.amazonaws.com/ukpop:latest"
-  memory: 32000
-  vcpus: 4
-  job_definition_name: "gtfs-ukpop11"
-  job_queue: "general_purpose_queue"
\ No newline at end of file
+  - graph
\ No newline at end of file
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..88489f5
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,33 @@
+import os
+
+import pandas as pd
+
+from gtfs_skims import utils
+
+
+def test_parse_timestamp():
+    assert utils.ts_to_sec('00:00:00') == 0
+    assert utils.ts_to_sec('10:01:01') == 36061
+
+
+def test_get_logger(tmpdir):
+    logger = utils.get_logger(os.path.join(tmpdir, 'logs', 'log.log'))
+    logger.info('test')
+
+
+def test_load_config(config):
+    'path_gtfs' in config.__dict__
+
+
+def test_load_gtfs(gtfs_data):
+    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+        assert isinstance(getattr(gtfs_data, x), pd.DataFrame)
+
+
+def test_cache_gtfs(gtfs_data, tmpdir):
+    gtfs_data.save(tmpdir)
+    gtfs_cached = utils.GTFSData.from_parquet(tmpdir)
+    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+        pd.testing.assert_frame_equal(
+            getattr(gtfs_data, x), getattr(gtfs_cached, x)
+        )

From ea8c3da7a020b3313a9ad753bbce88f3129dd5c0 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 1 Dec 2023 17:31:30 +0000
Subject: [PATCH 03/30] filtering

---
 README.md                   |   2 +-
 gtfs_skims/preprocessing.py |  42 ++++++++++++++++++++++++++++++++++++
 gtfs_skims/utils.py         |  15 +++++++++++++
 resources/logos/title.png   | Bin 0 -> 750 bytes
 tests/test_preprocessing.py |   9 ++++++++
 tests/test_utils.py         |   4 ++++
 6 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 resources/logos/title.png
 create mode 100644 tests/test_preprocessing.py

diff --git a/README.md b/README.md
index db9a868..d6ddf93 100755
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 <!--- --8<-- [start:docs] -->
 ![gtfs_skims](resources/logos/title.png)
 
-# gtfs-skims (gtfs_skims)
+# Argo (gtfs_skims)
 
 [![Daily CI Build](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml/badge.svg)](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml)
 [![Documentation](https://github.com/arup-group/gtfs_skims/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages)](https://arup-group.github.io/gtfs_skims)
diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
index e69de29..9b4d8b9 100644
--- a/gtfs_skims/preprocessing.py
+++ b/gtfs_skims/preprocessing.py
@@ -0,0 +1,42 @@
+
+
+from gtfs_skims.utils import GTFSData, get_weekday
+
+
+def filter_day(data: GTFSData, date: int) -> None:
+    """Filter the GTFS for a specific date  in the calendar.
+
+    Args:
+        data (Data): GTFS data object
+        date (int): Date as yyyymmdd
+    """
+    weekday = get_weekday(date)
+    data.calendar = data.calendar[
+        (data.calendar['start_date'] <= date) &
+        (data.calendar['end_date'] >= date) &
+        (data.calendar[weekday] == 1)
+    ]
+
+    data.trips = data.trips[
+        data.trips['service_id'].isin(
+            set(data.calendar['service_id'])
+        )
+    ]
+
+    data.routes = data.routes[
+        data.routes['route_id'].isin(
+            set(data.trips['route_id'])
+        )
+    ]
+
+    data.stop_times = data.stop_times[
+        data.stop_times['trip_id'].isin(
+            set(data.trips['trip_id'])
+        )
+    ]
+
+    data.stops = data.stops[
+        data.stops['stop_id'].isin(
+            set(data.stop_times['stop_id'])
+        )
+    ]
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index a5ab706..d646cd5 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 from dataclasses import dataclass
+from datetime import datetime
 import logging
 import os
 from pathlib import Path
@@ -23,6 +24,20 @@ def ts_to_sec(x: str) -> int:
     return 3600*s[0]+60*s[1]+s[2]
 
 
+def get_weekday(date: int) -> str:
+    """Get the weekday of a date
+
+    Args:
+        date (int): Date as yyyymmdd
+
+    Returns:
+        str: Day name
+    """
+    weekday = datetime.strptime(str(date), '%Y%m%d')
+    weekday = datetime.strftime(weekday, '%A').lower()
+    return weekday
+
+
 def get_logger(path_output: Optional[str] = None) -> logging.Logger:
     """Get the library logger.
 
diff --git a/resources/logos/title.png b/resources/logos/title.png
new file mode 100644
index 0000000000000000000000000000000000000000..24ad2cf1780872fcb2d1f0568fc4d1f5b29bf3bd
GIT binary patch
literal 750
zcmeAS@N?(olHy`uVBq!ia0vp^2_VeD3?#3*wSy!Wi-X*q7}lMWc?smOq&xaLGB9lH
z=l+w(3gjCH_=LCuX;Q&rR=1NtJpv^`e!&a^3I+}V2?Y)P^Vjb`fB(I*R8tpFj<dic
zvY3H^?=T269?xHq!oa|!;pyTSlHvUJ`st#_20Si-7t7Wh*{JgG|MfQhhKz<T-K?o0
z3(XH->UkfjqFSsr|NQgMKWp~Y@A|NSS;uBwNz++#s{`+hW$WT29yuM~_Ca>L-;Crp
zEx*O?bFP$MJK@(x;h_D(LH7Fu4;k0{aWBnpy=3n6a=+M$r7a&8q-zAkb6)BH=*R!<
zn4w^leCsv#g<rSxS8cM^c8F(rVg4}TKG&-AO|K>w>{!owd41DM@x%rFO<%%)8FQ|^
zKkw-60C|>w5sU0zszmFXTb;DGxyIhOttE5Qp{ki`g0K3%t)1{Y>_n}$%=_$x-?j$)
zj*@t}_&}}5j{ZXt@hmU96>0_De(NO6XMMBT;NJSCcj1oTR~Ias&wBfO)7$iiG4@w?
z)l~(S{8gFr<~Pv0d-itUytn(ouFp!G*7`!b-Ty3^oO5qp$)worTZIobeLwy8srV+*
zXC>Ji4w>3+cG@><zwoQ@F0rEfTs!kyi@w$?f0(YswRL^#t?N#2_lq&r^0B@HyG8!7
zxb4l`ONE!{H+^w!*!T0~i%3S0qgof&YQLxxE!iK()AxOA!tclpe|37k1pat-`CH)s
z;u+s^>Z|`Ra*k>HrE%fY+9jNOzPM#Fy}$aN{~X8HYsoF2ju{=gxKHp$*ix1%<wv?T
z3)Zvj+}e2I-VTMHFOCPI6NMl2-fP)dC%oZ4Fpv|s_&2?qyx^Bg&-d<x->wFKmFB!u
tPkj5!^TjVui|gNi*4P1~{I~P$W%9hCzL#0wR+NAerl+f)%Q~loCIFwcPF?^2

literal 0
HcmV?d00001

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
new file mode 100644
index 0000000..f1de471
--- /dev/null
+++ b/tests/test_preprocessing.py
@@ -0,0 +1,9 @@
+from gtfs_skims import preprocessing
+
+
+def test_filter_date(gtfs_data):
+    a = 1
+    assert 14 in gtfs_data.calendar.service_id.values
+    preprocessing.filter_day(gtfs_data, 20180507)
+    assert list(gtfs_data.calendar.service_id) == [14]
+    assert set(gtfs_data.trips['service_id']) == set([14])
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 88489f5..5585b62 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -15,6 +15,10 @@ def test_get_logger(tmpdir):
     logger.info('test')
 
 
+def test_weekday():
+    assert utils.get_weekday(20231201) == 'friday'
+
+
 def test_load_config(config):
     'path_gtfs' in config.__dict__
 

From 42c9f89661386074c92135d394d2d8ddd63b0f43 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 4 Dec 2023 17:54:29 +0000
Subject: [PATCH 04/30] complete preprocessing

---
 gtfs_skims/preprocessing.py             | 126 +++++++++++++++++++++++-
 test_data/outputs/log_preprocessing.log |   1 +
 tests/test_data/config_demo.yaml        |   8 +-
 tests/test_preprocessing.py             |  43 ++++++++
 4 files changed, 173 insertions(+), 5 deletions(-)
 create mode 100644 test_data/outputs/log_preprocessing.log

diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
index 9b4d8b9..aa66e97 100644
--- a/gtfs_skims/preprocessing.py
+++ b/gtfs_skims/preprocessing.py
@@ -1,6 +1,9 @@
+import os
 
+import pyproj
 
-from gtfs_skims.utils import GTFSData, get_weekday
+from gtfs_skims.utils import (
+    GTFSData, Config, get_weekday, ts_to_sec, get_logger)
 
 
 def filter_day(data: GTFSData, date: int) -> None:
@@ -40,3 +43,124 @@ def filter_day(data: GTFSData, date: int) -> None:
             set(data.stop_times['stop_id'])
         )
     ]
+
+
+def filter_time(data: GTFSData, start_time: int, end_time: int) -> None:
+    """Filter the GTFS for a specified time window.
+
+    Args:
+        data (Data): GTFS data object
+        start_time (int): Start of the time window (seconds from midnight)
+        end_time (int): End of the time window (seconds from midnight)
+    """
+    # filter stop times
+    data.stop_times['departure_s'] = data.stop_times['departure_time'].apply(
+        ts_to_sec)
+    data.stop_times['arrival_s'] = data.stop_times['arrival_time'].apply(
+        ts_to_sec)
+    data.stop_times = data.stop_times[
+        (data.stop_times['arrival_s'] >= start_time) &
+        (data.stop_times['departure_s'] <= end_time)
+    ]
+
+    # filter stops
+    data.stops = data.stops[data.stops['stop_id'].isin(
+        set(data.stop_times['stop_id'])
+    )]
+
+    # filter trips
+    data.trips = data.trips[data.trips['trip_id'].isin(
+        set(data.stop_times['trip_id'])
+    )]
+
+    # filter routes
+    data.routes = data.routes[data.routes['route_id'].isin(
+        set(data.trips['route_id'])
+    )]
+
+
+def add_coordinates(data: GTFSData) -> None:
+    """Add BNG coordinates to the stop and stoptime tables.
+
+    Args:
+        data (Data): Data object.
+    """
+    transformer = pyproj.Transformer.from_crs(
+        pyproj.transformer.CRS('epsg:4326'),
+        pyproj.transformer.CRS('epsg:27700'), always_xy=True)
+
+    data.stops['x'], data.stops['y'] = transformer.transform(
+        data.stops['stop_lon'], data.stops['stop_lat']
+    )
+
+    data.stops['x'] = data.stops['x'].round().map(int)
+    data.stops['y'] = data.stops['y'].round().map(int)
+
+    data.stop_times['x'] = data.stop_times['stop_id'].map(
+        data.stops.set_index('stop_id')['x']
+    )
+    data.stop_times['y'] = data.stop_times['stop_id'].map(
+        data.stops.set_index('stop_id')['y']
+    )
+
+
+def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: int) -> None:
+    """Filter a GTFS with a bounding box. Coordinates are using the BNG projection.
+
+    Args:
+        data (Data): Data object.
+        xmin (int): Min Easting.
+        xmax (int): Max Easting.
+        ymin (int): Min Northing.
+        ymax (int): Max Northing
+    """
+    data.stops = data.stops[
+        (data.stops['x'] >= xmin) &
+        (data.stops['x'] <= xmax) &
+        (data.stops['y'] >= ymin) &
+        (data.stops['y'] <= ymax)
+    ]
+
+    # filter stop times
+    data.stop_times = data.stop_times[
+        data.stop_times['stop_id'].isin(
+            set(list(data.stops['stop_id']))
+        )
+    ]
+
+    # filter trips
+    data.trips = data.trips[data.trips['trip_id'].isin(
+        set(data.stop_times['trip_id'])
+    )]
+
+    # filter routes
+    data.routes = data.routes[data.routes['route_id'].isin(
+        set(data.trips['route_id'])
+    )]
+
+
+def main(path_config: str) -> None:
+    """Run the preprocessing pipeline
+
+    Args:
+        path_config (str): Path to the config file.
+    """
+    config = Config.from_yaml(path_config)
+    logger = get_logger(os.path.join(
+        config.path_outputs, 'log_preprocessing.log'))
+    logger.info('Reading files...')
+    data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs)
+
+    logger.info('Time filtering..')
+    filter_day(data, config.calendar_date)
+    filter_time(data, config.start_s, config.end_s)
+    add_coordinates(data)
+
+    if config.bounding_box is not None:
+        logger.info('Cropping to bounding box..')
+        filter_bounding_box(data, **config.bounding_box)
+
+    logger.info(f'Saving outputs at {config.path_outputs}')
+    data.save(config.path_outputs)
+
+    logger.info(f'Preprocessing complete.')
diff --git a/test_data/outputs/log_preprocessing.log b/test_data/outputs/log_preprocessing.log
new file mode 100644
index 0000000..cade8fa
--- /dev/null
+++ b/test_data/outputs/log_preprocessing.log
@@ -0,0 +1 @@
+2023-12-04 17:44:51,834 - gtfs_skims.utils - INFO - Reading files...
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index 85e810a..8bc2241 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -1,8 +1,8 @@
 paths:
-  path_gtfs: ./iow-bus-gtfs.zip
-  path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow
-  path_origins: ./centroids.csv # path to the origin points
-  path_destinations: ./centroids.csv # path to the destination points
+  path_gtfs: ./tests/test_data/iow-bus-gtfs.zip
+  path_outputs: ./tests/test_data/outputs
+  path_origins: ./tests/test_data/centroids.csv # path to the origin points
+  path_destinations: ./tests/test_data/centroids.csv # path to the destination points
 
 settings:
   calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file.
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index f1de471..e5d34db 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -1,3 +1,6 @@
+import os
+from pathlib import Path
+
 from gtfs_skims import preprocessing
 
 
@@ -7,3 +10,43 @@ def test_filter_date(gtfs_data):
     preprocessing.filter_day(gtfs_data, 20180507)
     assert list(gtfs_data.calendar.service_id) == [14]
     assert set(gtfs_data.trips['service_id']) == set([14])
+
+
+def test_filter_time(gtfs_data):
+    start_time = 9*3600
+    end_time = 10*3600
+    preprocessing.filter_time(gtfs_data, start_time, end_time)
+    assert gtfs_data.stop_times['arrival_s'].min() >= start_time
+    assert gtfs_data.stop_times['departure_s'].max() <= end_time
+
+
+def test_projected_coords_within_bounds(gtfs_data):
+    preprocessing.add_coordinates(gtfs_data)
+    # check that the BNG coordinates fall within an Isle-of-Wight bounding box
+    xmin, ymin = 423104, 69171
+    xmax, ymax = 471370, 101154
+
+    assert gtfs_data.stops['x'].min() > xmin
+    assert gtfs_data.stops['x'].max() < xmax
+    assert gtfs_data.stops['y'].min() > ymin
+    assert gtfs_data.stops['y'].max() < ymax
+
+
+def test_within_bounding_box(gtfs_data):
+    preprocessing.add_coordinates(gtfs_data)
+
+    # filter for Cowes
+    xmin, ymin = 447477, 92592
+    xmax, ymax = 451870, 96909
+    assert gtfs_data.stops['x'].min() < xmin
+    preprocessing.filter_bounding_box(
+        gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
+
+    assert gtfs_data.stops['x'].min() > xmin
+    assert gtfs_data.stops['x'].max() < xmax
+    assert gtfs_data.stops['y'].min() > ymin
+    assert gtfs_data.stops['y'].max() < ymax
+
+def test_run_preprocessing():
+    path_config = os.path.join(Path(__file__).parent, 'test_data', 'config_demo.yaml')
+    preprocessing.main(path_config)

From 6219edde8e7dd7e5ed0f0213d3fa43c40f121dc2 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 4 Dec 2023 17:55:30 +0000
Subject: [PATCH 05/30] remove folder

---
 test_data/outputs/log_preprocessing.log | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 test_data/outputs/log_preprocessing.log

diff --git a/test_data/outputs/log_preprocessing.log b/test_data/outputs/log_preprocessing.log
deleted file mode 100644
index cade8fa..0000000
--- a/test_data/outputs/log_preprocessing.log
+++ /dev/null
@@ -1 +0,0 @@
-2023-12-04 17:44:51,834 - gtfs_skims.utils - INFO - Reading files...

From 86a2045bcee2af7dc13af5165409fb278647e582 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Thu, 7 Dec 2023 18:35:20 +0000
Subject: [PATCH 06/30] connectors

---
 gtfs_skims/connectors.py         | 54 +++++++++++++++++++++++++++++++
 gtfs_skims/preprocessing.py      | 20 ++++++++----
 gtfs_skims/utils.py              |  3 ++
 tests/conftest.py                |  2 +-
 tests/test_connectors.py         | 55 ++++++++++++++++++++++++++++++++
 tests/test_data/config_demo.yaml |  1 +
 tests/test_preprocessing.py      | 13 ++++++--
 7 files changed, 137 insertions(+), 11 deletions(-)
 create mode 100644 tests/test_connectors.py

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index e69de29..c52dda3 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -0,0 +1,54 @@
+import os
+
+import numpy as np
+from scipy.spatial import KDTree
+
+from gtfs_skims.utils import Config, GTFSData, get_logger
+
+
+def query_pairs(coords: np.array, maxdist: float) -> np.array:
+    ids = coords[:, 2].argsort()
+
+    dtree = KDTree(coords[ids])
+    connectors = dtree.query_pairs(r=maxdist, output_type='ndarray', p=2)
+
+    return ids[connectors]
+
+
+def query_pairs_filter(coords: np.array, maxdist: float) -> np.array:
+    ods = query_pairs(coords, maxdist)
+    coords_o = coords[ods[:, 0]]
+    coords_d = coords[ods[:, 1]]
+
+    dcoords = coords_d - coords_o
+    walk = (dcoords[:, :2]**2).sum(1)**0.5  # euclidean distance on xy
+    wait = dcoords[:, 2] - walk
+
+    cond = (wait > 0) & ((walk+wait) <= maxdist)
+
+    return ods[cond]
+
+
+def get_access_connectors(data: GTFSData, config: Config):
+    # ... query ball tree
+    pass
+
+
+def get_egress_connectors(data: GTFSData, config: Config):
+    # ... query ball tree
+    pass
+
+
+def main(data: GTFSData, config: Config):
+    logger = get_logger(os.path.join(
+        config.path_outputs, 'log_connectors.log'))
+
+    # get feasible connections
+    logger.info('Getting transfer connectors...')
+    transfer_connectors = get_transfer_connectors(data, config)
+    logger.info('Getting access connectors...')
+    access_connectors = get_access_connectors(data, config)
+    logger.info('Getting egress connectors...')
+    egress_connectors = get_egress_connectors(data, config)
+
+    # save
diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
index aa66e97..f637d06 100644
--- a/gtfs_skims/preprocessing.py
+++ b/gtfs_skims/preprocessing.py
@@ -79,15 +79,16 @@ def filter_time(data: GTFSData, start_time: int, end_time: int) -> None:
     )]
 
 
-def add_coordinates(data: GTFSData) -> None:
+def add_coordinates(data: GTFSData, epsg: int = 27700) -> None:
     """Add BNG coordinates to the stop and stoptime tables.
 
     Args:
         data (Data): Data object.
+        epsg (int): The target coordinate system
     """
     transformer = pyproj.Transformer.from_crs(
         pyproj.transformer.CRS('epsg:4326'),
-        pyproj.transformer.CRS('epsg:27700'), always_xy=True)
+        pyproj.transformer.CRS(f'epsg:{epsg}'), always_xy=True)
 
     data.stops['x'], data.stops['y'] = transformer.transform(
         data.stops['stop_lon'], data.stops['stop_lat']
@@ -139,22 +140,25 @@ def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: i
     )]
 
 
-def main(path_config: str) -> None:
-    """Run the preprocessing pipeline
+def main(config: Config) -> GTFSData:
+    """Run the preprocessing pipeline and save resulting tables to disk.
 
     Args:
-        path_config (str): Path to the config file.
+        config (Config): Path Config object.
+
+    Returns:
+        GTFSData: Pre-processed GTFS data object.
     """
-    config = Config.from_yaml(path_config)
     logger = get_logger(os.path.join(
         config.path_outputs, 'log_preprocessing.log'))
+
     logger.info('Reading files...')
     data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs)
 
     logger.info('Time filtering..')
     filter_day(data, config.calendar_date)
     filter_time(data, config.start_s, config.end_s)
-    add_coordinates(data)
+    add_coordinates(data, epsg=config.epsg_centroids)
 
     if config.bounding_box is not None:
         logger.info('Cropping to bounding box..')
@@ -164,3 +168,5 @@ def main(path_config: str) -> None:
     data.save(config.path_outputs)
 
     logger.info(f'Preprocessing complete.')
+
+    return data
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index d646cd5..802c6bf 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -94,6 +94,8 @@ class Config:
         k : 500 # max nearest neighbours when calculating distances
         max_wait : 1800  # sec | Max wait time at a stop
         bounding_box : null
+        epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
+
 
     steps:
     - preprocessing
@@ -112,6 +114,7 @@ class Config:
     k: int
     end_s: int
     bounding_box: dict
+    epsg_centroids: int
     max_wait: int
     start_s: int
     walk_distance_threshold: int
diff --git a/tests/conftest.py b/tests/conftest.py
index 8f6127a..2022fb1 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -33,4 +33,4 @@ def config():
 
 @pytest.fixture
 def gtfs_data():
-    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
+    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
\ No newline at end of file
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
new file mode 100644
index 0000000..bd9de9a
--- /dev/null
+++ b/tests/test_connectors.py
@@ -0,0 +1,55 @@
+import itertools
+
+import numpy as np
+import pytest
+
+from gtfs_skims import connectors
+
+
+@pytest.fixture()
+def points():
+    p = np.arange(-20, 20, 2.5)
+    coords = np.array([(x, y, z) for x, y, z in itertools.product(p, p, p)])
+    return coords
+
+
+def find_index(coords, x, y, z):
+    idx = np.where(np.all(coords == np.array([x, y, z]), axis=1))[0][0]
+    return idx
+
+
+def get_valid_points(coords, source_idx, max_trasfer_dist):
+    dcoords = coords - coords[source_idx]
+    walk = (dcoords[:, :2]**2).sum(1)**0.5  # euclidean distance on xy
+    wait = dcoords[:, 2] - walk
+
+    is_valid = (wait > 0) & ((walk+wait) <= max_trasfer_dist)
+
+    return is_valid
+
+
+@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
+def test_query_all_valid_included(points, source):
+    """ All valid points are included in the query results """
+    source_idx = find_index(points, *source)
+    maxdist = 10
+    radius = maxdist * (2**0.5)
+    # ods = connectors.query_pairs(points, radius)
+    ods = connectors.query_pairs_filter(points, radius)
+    is_valid = get_valid_points(points, source_idx, maxdist)
+
+    ds = ods[ods[:, 0] == source_idx, 1]
+    assert is_valid[ds].sum() == is_valid.sum()
+
+
+@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
+def test_query_all_included_valid(points, source):
+    """ All results from the query are valid """
+    source_idx = find_index(points, *source)
+    maxdist = 10
+    radius = maxdist * (2**0.5)
+    ods = connectors.query_pairs_filter(points, radius)
+    is_valid = get_valid_points(points, source_idx, maxdist)
+
+    ds = ods[ods[:, 0] == source_idx, 1]
+    assert all(is_valid[ds])
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index 8bc2241..e5b6bdf 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -15,6 +15,7 @@ settings:
   k : 500 # max nearest neighbours when calculating distances
   max_wait : 1800  # sec | Max wait time at a stop
   bounding_box : null
+  epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
   
 steps:
   - preprocessing
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index e5d34db..acba58e 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -1,5 +1,6 @@
 import os
 from pathlib import Path
+import pytest
 
 from gtfs_skims import preprocessing
 
@@ -47,6 +48,12 @@ def test_within_bounding_box(gtfs_data):
     assert gtfs_data.stops['y'].min() > ymin
     assert gtfs_data.stops['y'].max() < ymax
 
-def test_run_preprocessing():
-    path_config = os.path.join(Path(__file__).parent, 'test_data', 'config_demo.yaml')
-    preprocessing.main(path_config)
+
+def test_run_preprocessing_demo(config, tmpdir):
+    path_outputs = os.path.join(tmpdir, 'outputs')
+    config.path_outputs = path_outputs
+    preprocessing.main(config)
+    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+        assert os.path.exists(
+            os.path.join(path_outputs, f'{x}.parquet.gzip')
+        )

From f83d44dbbcf7e830d6df2cdfcfe2d61fb1695983 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 8 Dec 2023 09:18:00 +0000
Subject: [PATCH 07/30] connectors tests

---
 gtfs_skims/connectors.py | 20 +++++++++++++-------
 tests/test_connectors.py |  5 ++---
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index c52dda3..4b371c2 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -6,27 +6,33 @@
 from gtfs_skims.utils import Config, GTFSData, get_logger
 
 
-def query_pairs(coords: np.array, maxdist: float) -> np.array:
+def query_pairs(coords: np.array, radius: float) -> np.array:
     ids = coords[:, 2].argsort()
 
     dtree = KDTree(coords[ids])
-    connectors = dtree.query_pairs(r=maxdist, output_type='ndarray', p=2)
+    connectors = dtree.query_pairs(r=radius, output_type='ndarray', p=2)
 
     return ids[connectors]
 
 
 def query_pairs_filter(coords: np.array, maxdist: float) -> np.array:
-    ods = query_pairs(coords, maxdist)
-    coords_o = coords[ods[:, 0]]
-    coords_d = coords[ods[:, 1]]
+    radius = maxdist * (2**0.5)
+    connectors = query_pairs(coords, radius)
+    coords_o = coords[connectors[:, 0]]
+    coords_d = coords[connectors[:, 1]]
 
     dcoords = coords_d - coords_o
     walk = (dcoords[:, :2]**2).sum(1)**0.5  # euclidean distance on xy
     wait = dcoords[:, 2] - walk
 
-    cond = (wait > 0) & ((walk+wait) <= maxdist)
+    is_feasible = (wait > 0) & ((walk+wait) <= maxdist)
+    connectors = connectors[is_feasible]
 
-    return ods[cond]
+    return connectors
+
+
+def get_transfer_connectors(data: GTFSData, config: Config):
+    pass
 
 
 def get_access_connectors(data: GTFSData, config: Config):
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index bd9de9a..386977a 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -34,7 +34,6 @@ def test_query_all_valid_included(points, source):
     source_idx = find_index(points, *source)
     maxdist = 10
     radius = maxdist * (2**0.5)
-    # ods = connectors.query_pairs(points, radius)
     ods = connectors.query_pairs_filter(points, radius)
     is_valid = get_valid_points(points, source_idx, maxdist)
 
@@ -47,8 +46,8 @@ def test_query_all_included_valid(points, source):
     """ All results from the query are valid """
     source_idx = find_index(points, *source)
     maxdist = 10
-    radius = maxdist * (2**0.5)
-    ods = connectors.query_pairs_filter(points, radius)
+    
+    ods = connectors.query_pairs_filter(points, maxdist)
     is_valid = get_valid_points(points, source_idx, maxdist)
 
     ds = ods[ods[:, 0] == source_idx, 1]

From cc2072dfca6de492d72a535b2fb4000dbf97acbb Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 8 Dec 2023 11:38:38 +0000
Subject: [PATCH 08/30] transfer object

---
 gtfs_skims/connectors.py | 107 ++++++++++++++++++++++++++++++++++-----
 tests/test_connectors.py |  17 +++++--
 2 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index 4b371c2..a4e4afc 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -1,4 +1,8 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from functools import cached_property
 import os
+from typing import Optional
 
 import numpy as np
 from scipy.spatial import KDTree
@@ -7,6 +11,17 @@
 
 
 def query_pairs(coords: np.array, radius: float) -> np.array:
+    """Get origin-destination pairs between points, within a radius.
+        The connections are forward-looking in z: ie the destination point
+            has always greater z coordinate than the origin point.
+
+    Args:
+        coords (np.array): Point coordinates (x, y, z)
+        radius (float): Maximum distance between points
+
+    Returns:
+        np.array: Feasible connections between points.
+    """
     ids = coords[:, 2].argsort()
 
     dtree = KDTree(coords[ids])
@@ -15,20 +30,84 @@ def query_pairs(coords: np.array, radius: float) -> np.array:
     return ids[connectors]
 
 
-def query_pairs_filter(coords: np.array, maxdist: float) -> np.array:
-    radius = maxdist * (2**0.5)
-    connectors = query_pairs(coords, radius)
-    coords_o = coords[connectors[:, 0]]
-    coords_d = coords[connectors[:, 1]]
-
-    dcoords = coords_d - coords_o
-    walk = (dcoords[:, :2]**2).sum(1)**0.5  # euclidean distance on xy
-    wait = dcoords[:, 2] - walk
-
-    is_feasible = (wait > 0) & ((walk+wait) <= maxdist)
-    connectors = connectors[is_feasible]
-
-    return connectors
+@dataclass
+class TransferConnector:
+    """ Manages transfer connectors. """
+    coords: np.array
+    ods: np.array
+    # route_id: np.array
+    # service_id: np.array
+
+    @cached_property
+    def ocoords(self) -> np.array:
+        """Origin coordinates.
+
+        Returns:
+            np.array: x, y, z
+        """
+        return self.coords[self.ods[:, 0]]
+
+    @cached_property
+    def dcoords(self) -> np.array:
+        """Destination coordinates.
+
+        Returns:
+            np.array: x, y, z
+        """
+        return self.coords[self.ods[:, 1]]
+
+    @cached_property
+    def walk(self) -> np.array:
+        """Walk distance (euclidean).
+
+        Returns:
+            np.array: Distance from origin to destination point (on the xy axis).
+        """
+        walk = ((self.dcoords[:, :2]-self.ocoords[:, :2])**2).sum(1)**0.5
+        return walk
+
+    @cached_property
+    def wait(self) -> np.array:
+        """Wait distance. It is calculated as the difference between timestamps (dz) 
+            and the distance required to walk to the destination.
+
+        Returns:
+            np.array: Wait distance.
+        """
+        wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk
+        return wait
+
+    def filter(self, cond: np.array[bool]) -> TransferConnector:
+        """Filter (in-place) Connnectors' origin-destination data based on a set of conditions.
+
+        Args:
+            cond np.array[bool]: The boolean condition filter to use.
+
+        Returns:
+            TransferConnector: Filtered Connectors object.
+        """
+        self.ods = self.ods[cond]
+        self.ocoords = self.ocoords[cond]
+        self.dcoords = self.dcoords[cond]
+        self.walk = self.walk[cond]
+        self.wait = self.wait[cond]
+        # self.route_id = self.route_id[cond]
+        # self.service_id = self.service_id[cond]
+
+        return self
+
+    def filter_feasible_transfer(self, maxdist):
+        is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist)
+        return self.filter(is_feasible)
+
+    def filter_max_walk(self, max_walk):
+        pass
+
+    def filter_max_wait(self, max_wait):
+        pass
+
+    def filter_same_route(self):
+        pass
 
 
 def get_transfer_connectors(data: GTFSData, config: Config):
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index 386977a..da01308 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -34,7 +34,7 @@ def test_query_all_valid_included(points, source):
     source_idx = find_index(points, *source)
     maxdist = 10
     radius = maxdist * (2**0.5)
-    ods = connectors.query_pairs_filter(points, radius)
+    ods = connectors.query_pairs(points, radius)
     is_valid = get_valid_points(points, source_idx, maxdist)
 
     ds = ods[ods[:, 0] == source_idx, 1]
@@ -46,9 +46,16 @@ def test_query_all_included_valid(points, source):
     """ All results from the query are valid """
     source_idx = find_index(points, *source)
     maxdist = 10
-    
-    ods = connectors.query_pairs_filter(points, maxdist)
+    radius = maxdist * (2**0.5)
+
+    ods = connectors.query_pairs(points, radius)
+    tc = connectors.TransferConnector(points, ods).\
+        filter_feasible_transfer(maxdist)
+    ods_filtered = tc.ods
+
     is_valid = get_valid_points(points, source_idx, maxdist)
 
-    ds = ods[ods[:, 0] == source_idx, 1]
-    assert all(is_valid[ds])
+    ds = ods_filtered[ods_filtered[:, 0] == source_idx, 1]
+
+    assert is_valid[ds].sum() == is_valid.sum()
+    assert len(is_valid[ds]) > 0 and all(is_valid[ds])

From 665ee68549945f1407073878bb194b9892c0ce59 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 8 Dec 2023 18:11:43 +0000
Subject: [PATCH 09/30] transfer filtering

---
 gtfs_skims/connectors.py         | 136 +++++++++++++++++++++++++------
 tests/test_connectors.py         |  85 ++++++++++++++++---
 tests/test_data/config_demo.yaml |   2 +-
 3 files changed, 183 insertions(+), 40 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index a4e4afc..44a160b 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -30,13 +30,13 @@ def query_pairs(coords: np.array, radius: float) -> np.array:
     return ids[connectors]
 
 
-@dataclass
-class TransferConnector:
+class TransferConnectors:
     """ Manages transfer connectors. """
-    coords: np.array
-    ods: np.array
-    # route_id: np.array
-    # service_id: np.array
+
+    def __init__(self, coords: np.array, max_tranfer_distance: float) -> None:
+        self.coords = coords
+        radius = max_tranfer_distance * (2**0.5)
+        self.ods = query_pairs(coords, radius=radius)
 
     @cached_property
     def ocoords(self) -> np.array:
@@ -77,41 +77,123 @@ def wait(self) -> np.array:
         wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk
         return wait
 
-    def filter(self, cond: np.array[bool]) -> TransferConnector:
+    def filter(self, cond: np.array[bool]) -> None:
         """Filter (in-place) Connnectors' origin-destination data based on a set of conditions.
 
         Args:
             cond np.array[bool]: The boolean condition filter to use.
-
-        Returns:
-            TransferConnector: Filtered Connectors object.
         """
-        self.ods = self.ods[cond]
-        self.ocoords = self.ocoords[cond]
-        self.dcoords = self.dcoords[cond]
-        self.walk = self.walk[cond]
-        self.wait = self.wait[cond]
-        # self.route_id = self.route_id[cond]
-        # self.service_id = self.service_id[cond]
+        ods = self.ods
+        ocoords = self.ocoords
+        dcoords = self.dcoords
+        walk = self.walk
+        wait = self.wait
+
+        self.ods = ods[cond]
+        self.ocoords = ocoords[cond]
+        self.dcoords = dcoords[cond]
+        self.walk = walk[cond]
+        self.wait = wait[cond]
 
         return self
 
-    def filter_feasible_transfer(self, maxdist):
+    def filter_feasible_transfer(self, maxdist: float) -> None:
+        """Remove any connections with insufficient transfer time.
+
+
+        Args:
+            maxdist (float): Maximum transfer distance (walk+wait)
+        """
         is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist)
-        return self.filter(is_feasible)
+        self.filter(is_feasible)
+
+    def filter_max_walk(self, max_walk: float) -> None:
+        """Remove any connections beyond a walk-distance threshold.
 
-    def filter_max_walk(self, max_walk):
-        pass
+        Args:
+            max_walk (float): Max walk distance
+        """
+        cond = (self.walk <= max_walk)
+        self.filter(cond)
 
-    def filter_max_wait(self, max_wait):
-        pass
+    def filter_max_wait(self, max_wait: float) -> None:
+        """Remove any connections beyond a wait distance threshold.
+
+        Args:
+            max_wait (float): Maximum stop (leg) wait time.
+        """
+        self.filter(self.wait <= max_wait)
 
-    def filter_same_route(self):
-        pass
+    def filter_same_route(self, routes: np.array) -> None:
+        """Remove connections between services of the same route.
 
+        Args:
+            routes (np.array): Route IDs array. Its indexing matches the self.coords table.
+        """
+        self.filter(
+            routes[self.ods[:, 0]] != routes[self.ods[:, 1]]
+        )
 
-def get_transfer_connectors(data: GTFSData, config: Config):
-    pass
+    def filter_nearest_service(self, services: np.array) -> None:
+        """If a service can be accessed from a origin through multiple stops,
+            then only keep the most efficient transfer for that connection.
+
+        Args:
+            services (np.array): Service IDs array. Its indexing must match the self.coords table.
+        """
+        services_d = services[self.ods[:, 1]]  # destination service
+
+        # sort by trasfer distance
+        transfer = self.wait + self.walk
+        idx_sorted = transfer.argsort()
+
+        # create origin-service combinations
+        order_o = int(np.floor(np.log10(services.max()))+1)
+        comb = (self.ods[:, 0]+1) * 10**order_o + services_d
+
+        # get first instance of each origin-service combination
+        # (which corresponds to the most efficient transfer)
+        keep = idx_sorted[np.unique(comb[idx_sorted], return_index=True)[1]]
+        cond = np.isin(np.arange(len(comb)), keep)
+
+        self.filter(cond)
+
+
+def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
+    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+    max_tranfer_distance = config.max_transfer_time * time_to_distance
+    max_wait_distance = config.max_wait * time_to_distance
+
+    # get candidate connectors
+    coords = data.stop_times[['x', 'y', 'departure_s']]
+    tc = TransferConnectors(coords, max_tranfer_distance)
+
+    # apply narrower filters
+    tc.filter_feasible_transfer(max_tranfer_distance)
+
+    if config.walk_distance_threshold < max_tranfer_distance:
+        tc.filter_max_walk()
+
+    if max_wait_distance < max_tranfer_distance:
+        tc.filter_max_wait()
+
+    routes = data.stop_times['trip_id'].map(
+        data.trips.set_index('trip_id')['route_id']
+    )
+    tc.filter_same_route(routes)
+
+    services = data.stop_times['trip_id'].map(
+        data.trips.set_index('trip_id')['service_id']
+    )
+    tc.filter_nearest_service(services)
+
+    arr = np.array([
+        tc.ods[:, 0],  # origin index
+        tc.ods[:, 1],  # destination index
+        tc.walk,  # walk distance (meters)
+        tc.wait/time_to_distance*3600  # wait time (seconds???)
+    ])
+    return arr
 
 
 def get_access_connectors(data: GTFSData, config: Config):
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index da01308..ffdfff9 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import itertools
 
 import numpy as np
@@ -13,6 +14,11 @@ def points():
     return coords
 
 
+@pytest.fixture()
+def transfer_connectors(points):
+    return connectors.TransferConnectors(points, 10)
+
+
 def find_index(coords, x, y, z):
     idx = np.where(np.all(coords == np.array([x, y, z]), axis=1))[0][0]
     return idx
@@ -33,12 +39,13 @@ def test_query_all_valid_included(points, source):
     """ All valid points are included in the query results """
     source_idx = find_index(points, *source)
     maxdist = 10
+    is_valid = get_valid_points(points, source_idx, maxdist)
+
     radius = maxdist * (2**0.5)
     ods = connectors.query_pairs(points, radius)
-    is_valid = get_valid_points(points, source_idx, maxdist)
 
-    ds = ods[ods[:, 0] == source_idx, 1]
-    assert is_valid[ds].sum() == is_valid.sum()
+    dest = ods[ods[:, 0] == source_idx, 1]
+    assert is_valid[dest].sum() == is_valid.sum()
 
 
 @pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
@@ -46,16 +53,70 @@ def test_query_all_included_valid(points, source):
     """ All results from the query are valid """
     source_idx = find_index(points, *source)
     maxdist = 10
-    radius = maxdist * (2**0.5)
+    is_valid = get_valid_points(points, source_idx, maxdist)
 
-    ods = connectors.query_pairs(points, radius)
-    tc = connectors.TransferConnector(points, ods).\
-        filter_feasible_transfer(maxdist)
-    ods_filtered = tc.ods
+    tc = connectors.TransferConnectors(points, maxdist)
+    tc.filter_feasible_transfer(maxdist)
+    dest = tc.ods[tc.ods[:, 0] == source_idx, 1]
 
-    is_valid = get_valid_points(points, source_idx, maxdist)
+    assert is_valid[dest].sum() == is_valid.sum()
+    assert len(is_valid[dest]) > 0 and all(is_valid[dest])
+
+
+def test_filter_transfer_walk(transfer_connectors):
+    max_walk = 5
+    assert transfer_connectors.walk.max() > max_walk
+    transfer_connectors.filter_max_walk(max_walk)
+    assert transfer_connectors.walk.max() <= max_walk
+
+
+def test_filter_transfer_wait(transfer_connectors):
+    max_wait = 5
+    assert transfer_connectors.wait.max() > max_wait
+    transfer_connectors.filter_max_wait(max_wait)
+    assert transfer_connectors.wait.max() <= max_wait
+
+
+def test_filter_same_route(transfer_connectors):
+    # assume all even-to-even point ID are in the same route
+    routes = np.arange(len(transfer_connectors.coords))
+    routes = np.where(routes % 2, -1, routes)
+    transfer_connectors.filter_same_route(routes)
+    assert (transfer_connectors.ods % 2).prod(1).sum() == 0
+
+
+def get_o_service_transfers(conn, services_d):
+    transfer_times = conn.wait + conn.walk
+    d = defaultdict(list)
+    for i in range(len(services_d)):
+        d[(conn.ods[i, 0], services_d[i])
+          ].append(transfer_times[i])
+    return d
+
+
+def test_filter_nearest_service(transfer_connectors):
+    np.random.seed(0)
+    services = np.random.randint(
+        0, 2, size=transfer_connectors.coords.shape[0])
+    services_d = services[transfer_connectors.ods[:, 1]]
+
+    # for every origin-service pair there are multiple connections
+    transfer_times = transfer_connectors.wait + transfer_connectors.walk
+    d_before = get_o_service_transfers(transfer_connectors, services_d)
+
+    assert max(map(len, d_before.values())) > 0
+
+    # after filtering, there is only one and it is the
+    # one with the minumum transfer time.
+    transfer_connectors.filter_nearest_service(services)
+    services_d = services[transfer_connectors.ods[:, 1]]
+
+    d_after = get_o_service_transfers(transfer_connectors, services_d)
 
-    ds = ods_filtered[ods_filtered[:, 0] == source_idx, 1]
+    # didn't lose any origin-service pairs
+    assert len(d_before) == len(d_after)
+    # single connection per origin-service
+    assert max(map(len, d_after.values())) == 1
 
-    assert is_valid[ds].sum() == is_valid.sum()
-    assert len(is_valid[ds]) > 0 and all(is_valid[ds])
+    for o, service in d_before.keys():
+        d_after[(o, service)][0] == min(d_before[(o, service)])
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index e5b6bdf..5fdc0ab 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -13,7 +13,7 @@ settings:
   crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
   max_transfer_time : 1800 # Max combined time of walking and waiting (sec)
   k : 500 # max nearest neighbours when calculating distances
-  max_wait : 1800  # sec | Max wait time at a stop
+  max_wait : 1800  # sec | Max wait time at a stop / leg
   bounding_box : null
   epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
   

From 8eb8eee2c0b29bf226cf5027916105d59b6f1cee Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 8 Dec 2023 18:37:50 +0000
Subject: [PATCH 10/30] handle transfers

---
 gtfs_skims/connectors.py | 37 +++++++++++++++++++++----------------
 tests/test_connectors.py | 13 +++++++++++--
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index 44a160b..48477b7 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -10,7 +10,7 @@
 from gtfs_skims.utils import Config, GTFSData, get_logger
 
 
-def query_pairs(coords: np.array, radius: float) -> np.array:
+def query_pairs(coords: np.ndarray, radius: float) -> np.array:
     """Get origin-destination pairs between points, within a radius.
         The connections are forward-looking in z: ie the destination point
             has always greater z coordinate than the origin point.
@@ -33,7 +33,7 @@ def query_pairs(coords: np.array, radius: float) -> np.array:
 class TransferConnectors:
     """ Manages transfer connectors. """
 
-    def __init__(self, coords: np.array, max_tranfer_distance: float) -> None:
+    def __init__(self, coords: np.ndarray, max_tranfer_distance: float) -> None:
         self.coords = coords
         radius = max_tranfer_distance * (2**0.5)
         self.ods = query_pairs(coords, radius=radius)
@@ -77,7 +77,7 @@ def wait(self) -> np.array:
         wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk
         return wait
 
-    def filter(self, cond: np.array[bool]) -> None:
+    def filter(self, cond: np.ndarray[bool]) -> None:
         """Filter (in-place) Connnectors' origin-destination data based on a set of conditions.
 
         Args:
@@ -124,7 +124,7 @@ def filter_max_wait(self, max_wait: float) -> None:
         """
         self.filter(self.wait <= max_wait)
 
-    def filter_same_route(self, routes: np.array) -> None:
+    def filter_same_route(self, routes: np.ndarray) -> None:
         """Remove connections between services of the same route.
 
         Args:
@@ -134,7 +134,7 @@ def filter_same_route(self, routes: np.array) -> None:
             routes[self.ods[:, 0]] != routes[self.ods[:, 1]]
         )
 
-    def filter_nearest_service(self, services: np.array) -> None:
+    def filter_nearest_service(self, services: np.ndarray) -> None:
         """If a service can be accessed from a origin through multiple stops,
             then only keep the most efficient transfer for that connection.
 
@@ -165,34 +165,39 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     max_wait_distance = config.max_wait * time_to_distance
 
     # get candidate connectors
-    coords = data.stop_times[['x', 'y', 'departure_s']]
+    coords = data.stop_times[['x', 'y', 'departure_s']].values
     tc = TransferConnectors(coords, max_tranfer_distance)
 
     # apply narrower filters
+    # enough time to make transfer
     tc.filter_feasible_transfer(max_tranfer_distance)
 
+    # maximum walk
     if config.walk_distance_threshold < max_tranfer_distance:
-        tc.filter_max_walk()
+        tc.filter_max_walk(config.walk_distance_threshold)
 
+    # maximum wait
     if max_wait_distance < max_tranfer_distance:
-        tc.filter_max_wait()
+        tc.filter_max_wait(max_wait_distance)
 
+    # not same route
     routes = data.stop_times['trip_id'].map(
         data.trips.set_index('trip_id')['route_id']
-    )
+    ).values
     tc.filter_same_route(routes)
 
+    # most efficient transfer to service
     services = data.stop_times['trip_id'].map(
         data.trips.set_index('trip_id')['service_id']
-    )
+    ).values
     tc.filter_nearest_service(services)
 
-    arr = np.array([
-        tc.ods[:, 0],  # origin index
-        tc.ods[:, 1],  # destination index
-        tc.walk,  # walk distance (meters)
-        tc.wait/time_to_distance*3600  # wait time (seconds???)
-    ])
+    # construct array
+    arr = np.concatenate([
+        tc.ods,  # origin and destination index
+        (tc.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
+        (tc.wait/time_to_distance).reshape(-1, 1)  # wait time (seconds)
+    ], axis=1).round(1).astype(np.uint32)
     return arr
 
 
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index ffdfff9..736f914 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -1,10 +1,11 @@
 from collections import defaultdict
 import itertools
+import os
 
 import numpy as np
 import pytest
 
-from gtfs_skims import connectors
+from gtfs_skims import connectors, preprocessing
 
 
 @pytest.fixture()
@@ -101,7 +102,6 @@ def test_filter_nearest_service(transfer_connectors):
     services_d = services[transfer_connectors.ods[:, 1]]
 
     # for every origin-service pair there are multiple connections
-    transfer_times = transfer_connectors.wait + transfer_connectors.walk
     d_before = get_o_service_transfers(transfer_connectors, services_d)
 
     assert max(map(len, d_before.values())) > 0
@@ -120,3 +120,12 @@ def test_filter_nearest_service(transfer_connectors):
 
     for o, service in d_before.keys():
         d_after[(o, service)][0] == min(d_before[(o, service)])
+
+
+def test_get_transfer_array(config, tmpdir):
+    path_outputs = os.path.join(tmpdir, 'outputs')
+    config.path_outputs = path_outputs
+    gtfs_data = preprocessing.main(config)
+    arr = connectors.get_transfer_connectors(gtfs_data, config)
+    assert len(arr) > 0
+    assert isinstance(arr, np.ndarray)
\ No newline at end of file

From 47bc57c36094f196029a427508dd4370543434e8 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 11 Dec 2023 14:23:29 +0000
Subject: [PATCH 11/30] access egress

---
 .gitignore                                    |   2 +-
 gtfs_skims/cli.py                             |  23 ++-
 gtfs_skims/connectors.py                      | 151 ++++++++++++++++--
 gtfs_skims/preprocessing.py                   |   2 +-
 gtfs_skims/utils.py                           |   1 -
 gtfs_skims/variables.py                       |   4 +
 tests/conftest.py                             |   8 +-
 tests/test_cli.py                             |   1 -
 tests/test_connectors.py                      |  46 +++++-
 tests/test_data/config_demo.yaml              |   1 -
 tests/test_data/outputs/calendar.parquet.gzip | Bin 0 -> 3098 bytes
 tests/test_data/outputs/routes.parquet.gzip   | Bin 0 -> 2431 bytes
 .../test_data/outputs/stop_times.parquet.gzip | Bin 0 -> 8296 bytes
 tests/test_data/outputs/stops.parquet.gzip    | Bin 0 -> 8421 bytes
 tests/test_data/outputs/trips.parquet.gzip    | Bin 0 -> 1475 bytes
 15 files changed, 213 insertions(+), 26 deletions(-)
 create mode 100644 tests/test_data/outputs/calendar.parquet.gzip
 create mode 100644 tests/test_data/outputs/routes.parquet.gzip
 create mode 100644 tests/test_data/outputs/stop_times.parquet.gzip
 create mode 100644 tests/test_data/outputs/stops.parquet.gzip
 create mode 100644 tests/test_data/outputs/trips.parquet.gzip

diff --git a/.gitignore b/.gitignore
index fb87f1a..5f9b874 100755
--- a/.gitignore
+++ b/.gitignore
@@ -40,4 +40,4 @@ mike-*.yml
 .ipynb_checkpoints
 
 sandbox.py
-tests/test_data/outputs/
\ No newline at end of file
+tests/test_data/outputs/*log
\ No newline at end of file
diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py
index 0707407..328bd9e 100755
--- a/gtfs_skims/cli.py
+++ b/gtfs_skims/cli.py
@@ -3,12 +3,29 @@
 import click
 
 
+from gtfs_skims.preprocessing import main as main_preprocessing
+from gtfs_skims.connectors import main as main_connectors
+from gtfs_skims.utils import Config
+
+
 @click.version_option(package_name="gtfs_skims")
-@click.command()
+@click.group
 def cli(args=None):
     """Console script for gtfs_skims."""
     click.echo(
-        "Replace this message by putting your code into gtfs_skims.cli.cli"
+        "Console script for Argo (gtfs_skims)."
     )
-    click.echo("See click documentation at https://click.palletsprojects.com/")
     return 0
+
+
+@cli.command()
+@click.argument('config_path')
+def run(config_path: str):
+    config = Config.from_yaml(config_path)
+    steps = config.steps
+
+    if 'preprocessing' in steps:
+        main_preprocessing(config=config)
+
+    if 'connectors' in steps:
+        main_connectors(config=config)
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index 48477b7..8d15bfd 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -6,8 +6,10 @@
 
 import numpy as np
 from scipy.spatial import KDTree
+import pandas as pd
 
 from gtfs_skims.utils import Config, GTFSData, get_logger
+from gtfs_skims.variables import DATA_TYPE
 
 
 def query_pairs(coords: np.ndarray, radius: float) -> np.array:
@@ -159,6 +161,74 @@ def filter_nearest_service(self, services: np.ndarray) -> None:
         self.filter(cond)
 
 
+def query_pairs_od(
+        coords_origins: np.ndarray,
+        coords_destinations: np.ndarray,
+        radius: float
+) -> np.array:
+    """Get origin-destination pairs between points, within a radius.
+
+    Args:
+        coords_origins (np.array): Coordinates of origin points
+        coords_destinations (np.array): Coordinates of destination points
+        radius (float): Maximum distance between points
+
+    Returns:
+        np.array: Feasible connections between points.
+    """
+    tree_origins = KDTree(coords_origins)
+    tree_destinations = KDTree(coords_destinations)
+
+    ods = tree_origins.query_ball_tree(
+        tree_destinations, r=radius)
+
+    # flatten
+    ods = np.column_stack([
+        np.repeat(range(len(coords_origins)), list(map(len, ods))),
+        np.concatenate(ods)
+    ]).astype(DATA_TYPE)
+
+    return ods
+
+
+class AccessEgressConnectors(TransferConnectors):
+    """ Connections between zones/endpoints and stops """
+
+    def __init__(
+            self,
+            coords_origins: np.ndarray,
+            coords_destinations: np.ndarray,
+            max_tranfer_distance: float
+    ) -> None:
+        self.coords_origins = coords_origins
+        self.coords_destinations = coords_destinations
+
+        radius = max_tranfer_distance
+        if coords_origins.shape[1] == 3:
+            radius += max_tranfer_distance * (2**0.5)
+
+        self.ods = query_pairs_od(coords_origins, coords_destinations,
+                                  radius=radius)
+
+    @cached_property
+    def ocoords(self) -> np.array:
+        """Origin coordinates.
+
+        Returns:
+            np.array: x, y (, z)
+        """
+        return self.coords_origins[self.ods[:, 0]]
+
+    @cached_property
+    def dcoords(self) -> np.array:
+        """Destination coordinates.
+
+        Returns:
+            np.array: x, y (,z)
+        """
+        return self.coords_destinations[self.ods[:, 1]]
+
+
 def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
     max_tranfer_distance = config.max_transfer_time * time_to_distance
@@ -168,7 +238,7 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     coords = data.stop_times[['x', 'y', 'departure_s']].values
     tc = TransferConnectors(coords, max_tranfer_distance)
 
-    # apply narrower filters
+    # apply more narrow filters:
     # enough time to make transfer
     tc.filter_feasible_transfer(max_tranfer_distance)
 
@@ -197,30 +267,89 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
         tc.ods,  # origin and destination index
         (tc.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
         (tc.wait/time_to_distance).reshape(-1, 1)  # wait time (seconds)
-    ], axis=1).round(1).astype(np.uint32)
+    ], axis=1).round(1).astype(DATA_TYPE)
+
     return arr
 
 
-def get_access_connectors(data: GTFSData, config: Config):
-    # ... query ball tree
-    pass
+def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.ndarray):
+    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+    max_tranfer_distance = config.max_transfer_time * time_to_distance
+    max_wait_distance = config.max_wait * time_to_distance
 
+    # get candidate connectors
+    coords_stops = data.stop_times[['x', 'y', 'departure_s']].values
+    ac = AccessEgressConnectors(
+        coords_origins, coords_stops, max_tranfer_distance)
 
-def get_egress_connectors(data: GTFSData, config: Config):
-    # ... query ball tree
-    pass
+    # more narrow filtering
+    ac.filter_feasible_transfer(max_tranfer_distance)
+    if config.walk_distance_threshold < max_tranfer_distance:
+        ac.filter_max_walk(config.walk_distance_threshold)
+    if max_wait_distance < max_tranfer_distance:
+        ac.filter_max_wait(max_wait_distance)
 
+    arr = np.concatenate([
+        ac.ods,  # origin and destination index
+        (ac.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
+        (ac.wait/time_to_distance).reshape(-1, 1)  # wait time (seconds)
+    ], axis=1).round(1).astype(DATA_TYPE)
 
-def main(data: GTFSData, config: Config):
+    return arr
+
+
+def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: np.ndarray):
+    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+
+    # get candidate connectors
+    coords_stops = data.stop_times[['x', 'y']].values
+    ec = AccessEgressConnectors(
+        coords_stops, coords_destinations, config.walk_distance_threshold)
+
+    arr = np.concatenate([
+        ec.ods,  # origin and destination index
+        (ec.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
+        np.array([0]*len(ec.ods)).reshape(-1, 1)  # wait time = 0
+    ], axis=1).round(1).astype(DATA_TYPE)
+
+    return arr
+
+
+def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]:
     logger = get_logger(os.path.join(
         config.path_outputs, 'log_connectors.log'))
+    coords_origins = pd.read_csv(config.path_origins)
+    coords_destinations = pd.read_csv(config.path_destinations)
 
     # get feasible connections
     logger.info('Getting transfer connectors...')
     transfer_connectors = get_transfer_connectors(data, config)
     logger.info('Getting access connectors...')
-    access_connectors = get_access_connectors(data, config)
+    access_connectors = get_access_connectors(data, config, coords_origins)
     logger.info('Getting egress connectors...')
-    egress_connectors = get_egress_connectors(data, config)
+    egress_connectors = get_egress_connectors(
+        data, config, coords_destinations)
+
+    # convert to dataframe
+    colnames = ['onode', 'dnode', 'walk', 'wait']
+    transfer_connectors = pd.DataFrame(transfer_connectors, columns=colnames)
+    access_connectors = pd.DataFrame(access_connectors, columns=colnames)
+    egress_connectors = pd.DataFrame(egress_connectors, columns=colnames)
+
+    # offset IDs for endpoints
+    access_connectors['onode'] += (len(data.stop_times)+1)
+    egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)+2)
 
     # save
+    logger.info(f'Saving connectors to f{config.path_outputs}...')
+    transfer_connectors.to_parquet(
+        os.path.join(config.path_outputs, 'connectors_transfer.parquet')
+    )
+    access_connectors.to_parquet(
+        os.path.join(config.path_outputs, 'connectors_access.parquet')
+    )
+    egress_connectors.to_parquet(
+        os.path.join(config.path_outputs, 'connectors_egress.parquet')
+    )
+
+    return transfer_connectors, access_connectors, egress_connectors
diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
index f637d06..419e82e 100644
--- a/gtfs_skims/preprocessing.py
+++ b/gtfs_skims/preprocessing.py
@@ -144,7 +144,7 @@ def main(config: Config) -> GTFSData:
     """Run the preprocessing pipeline and save resulting tables to disk.
 
     Args:
-        config (Config): Path Config object.
+        config (Config): Config object.
 
     Returns:
         GTFSData: Pre-processed GTFS data object.
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index 802c6bf..3482a17 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -111,7 +111,6 @@ class Config:
     calendar_date: int
     crows_fly_factor: float
     max_transfer_time: int
-    k: int
     end_s: int
     bounding_box: dict
     epsg_centroids: int
diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py
index f1e9591..c6ded4e 100644
--- a/gtfs_skims/variables.py
+++ b/gtfs_skims/variables.py
@@ -1,3 +1,7 @@
+import numpy as np
+
+DATA_TYPE = np.uint32
+
 # route types lookup
 # source: https://developers.google.com/transit/gtfs/reference#routestxt
 # and https://developers.google.com/transit/gtfs/reference/extended-route-types
diff --git a/tests/conftest.py b/tests/conftest.py
index 2022fb1..bc546af 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -31,6 +31,12 @@ def response():
 def config():
     return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml'))
 
+
 @pytest.fixture
 def gtfs_data():
-    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
\ No newline at end of file
+    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
+
+
+@pytest.fixture
+def gtfs_data_preprocessed():
+    return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs'))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index cb08971..f5a59d0 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -10,7 +10,6 @@ def test_command_line_interface():
     runner = CliRunner()
     result = runner.invoke(cli.cli)
     assert result.exit_code == 0
-    assert "gtfs_skims.cli.cli" in result.output
     help_result = runner.invoke(cli.cli, ["--help"])
     assert help_result.exit_code == 0
     assert (
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index 736f914..2922397 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import pytest
+import unittest.mock as mock
 
 from gtfs_skims import connectors, preprocessing
 
@@ -122,10 +123,43 @@ def test_filter_nearest_service(transfer_connectors):
         d_after[(o, service)][0] == min(d_before[(o, service)])
 
 
-def test_get_transfer_array(config, tmpdir):
-    path_outputs = os.path.join(tmpdir, 'outputs')
-    config.path_outputs = path_outputs
-    gtfs_data = preprocessing.main(config)
-    arr = connectors.get_transfer_connectors(gtfs_data, config)
+def test_get_transfer_array(gtfs_data_preprocessed, config):
+    arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config)
     assert len(arr) > 0
-    assert isinstance(arr, np.ndarray)
\ No newline at end of file
+    assert isinstance(arr, np.ndarray)
+
+
+def test_get_od_pairs():
+    ods = connectors.query_pairs_od(
+        np.array([[0, 0], [1, 1]]),
+        np.array([[0.5, 0.5], [2, 1], [2, 2]]),
+        radius=1
+    )
+    expected = np.array([
+        [0, 0],
+        [1, 0],
+        [1, 1]
+    ])
+    np.testing.assert_equal(ods, expected)
+
+
+def test_get_od_walk():
+    egress = connectors.AccessEgressConnectors(
+        np.array([[0, 0], [1, 1]]),
+        np.array([[0.5, 0.5], [2, 1], [2, 2]]),
+        max_tranfer_distance=1
+    )
+    walk = egress.walk
+    expected = np.array([
+        (2*0.5**2)**0.5, (2*0.5**2)**0.5, 1
+    ])
+    np.testing.assert_almost_equal(walk, expected)
+
+
+def test_convert_distance_3d():
+    egress = connectors.AccessEgressConnectors(
+        np.array([[0, 0, 0]]),
+        np.array([[1, 1, 1]]),
+        max_tranfer_distance=1
+    )
+    assert len(egress.ods) == 1  # radius has been adjusted to 3D space
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index 5fdc0ab..4196b30 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -12,7 +12,6 @@ settings:
   walk_speed : 4.5  # kph | Walking speed
   crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
   max_transfer_time : 1800 # Max combined time of walking and waiting (sec)
-  k : 500 # max nearest neighbours when calculating distances
   max_wait : 1800  # sec | Max wait time at a stop / leg
   bounding_box : null
   epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
diff --git a/tests/test_data/outputs/calendar.parquet.gzip b/tests/test_data/outputs/calendar.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..f760856622100722a9d0176c217c983ebdc2fdde
GIT binary patch
literal 3098
zcmcJS!E4h{9LHa~F3UPZ5ff4^8geKTy3%e#hNvit4Dm2_@Suz?ZC<y`B<+%yxiSU?
z@#IxO>P7Hx5Qcb|2XC7q!obUz7jI649R+<!UtV9zmTiRPFkW7M-_Q5=OWymvZtB)_
zNdUrq;Z9!Q@PHF|04_bpf4u8owK6+9Th&_0-0<e7RvJv#c3<W217C7CLUOL!0yl;?
zH?d|f|N1-C+>Fp*n~A<t06cJSkDri0YXNkg1LluEk59;9zZxc@ik1ObJX3o|(>Ye~
zy$JG)=G+Fa6(`?*Edp@%>C4m9VlqNyT%Y@hRd2pw(a$eK7kyDOvoXmz)p=;Df(V3^
z&`JnNsb9A=xJ+-;4z!o_DlF?7o#V{9rKwGNKY(25uoHt%pk{@qcr@?Yp&+hi>#T@(
zROH(14ClBZpR|Ki8<jN`LF&yzOY6*tTqc|OkmIvLDkFdlDMUz#oM?%)3=sKi5}(G)
z+`A;Q7XT0e$Yk-#2SO6h=d3`^6iyiPm|<ZCR`{3150Oz1V=ECr$eSX73I=Q?Fd;z+
zWYC+%S~4I6@0LNVC69t=_XUXV*I*#+z7QnHsHc*@6hglr!}uF!98k-4DoBKUafYy+
z3T1HsVZA>f&^1CB5Ahpa@5hWl*&X#%(&hpNSyc?7%^iiXM+wY2BoqVZvFA~;yJP;V
zl=ur|)RV`~K#&JrxrDKR89M`*@$B;iVFaTvLwr9da(K~7TtmhC!;yXutti#H;hI+2
z-@MRKuF8*Al$s6=t!$|##P^nK7<pMSAyV-MQoA$Z=ay?Wnq|~%cp9Z%S%6ifU|q+G
zJ}~uVsEz0rf^*PTR%dWTCxiX06+bJFa-e%RB%A<txH7(ve`kI*FZWcbblX+5UpOwN
zX{R4X8+K!2nvrH3?ZyPfw4<#&+AtIr(=s?Jq8-_&n1-E`DB7+Y6Vog>=A!M`sJMpy
zDSC(<jfrbUkLW`TLvb-pgWmwrmTc6q4E^qALlxC++!R+7L$BDX-Siq{O?6O1wHICJ
zcgZDaJ9^#no{Sbp#|oulp`_q0)oE{KrbUv5=b=e!WwnJ;VazKAqmhm<k(JJ@0;#|l
gk2IQSzHZ3`AX8^~xZ>)DCKoS*UxNTl;r|N$18Fz(m;e9(

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/routes.parquet.gzip b/tests/test_data/outputs/routes.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..a5b796d61c9c1693d31c481f9d1d4be7d34aff6a
GIT binary patch
literal 2431
zcmWG=3^EjD5X}*d(h=nXGT1~p7#QTcfdmNr4?Vh*lYxQbz=0nP5B@RVUh;94cLMX$
z>D&45GnQ@=^)Lgf0qah}s@wGFMqUO67M26`OEs@1arW+1{j|=K`2o+PC3<(OzC<TR
zOU;6s5F;9b)r91vgoFo4DNK_NCZ13+o>0=w(-_1Pz`zjQZ+1WrVtI&Y1b(e<ZVHzq
zoMss_dAbWcJjB4tkW$BcQwyT`5tHaNCal_HdpB|&HsEo&+t>O|*46v?pBC=cyru6Z
z1qukyRtXBoar$THbs~k=s$$RMbH`?!e!1>G>ymZ-EN1=>gqEFP5v{Qd`otu^XmynS
zx{8oJ&z%qEXVwI2UJTwUmHJ?g)53j6{S%Mn9(Ys~e37e|y}r{U_kC#L%x#6mcesws
ze4MZm;<`P|q9?GsF4T7~7n32+x<0*qhdMVm&n{*y6=2UV;xW1LIEyJWd*^m@L0LWL
zA9a(L7Uyk^c)jmX$K7?Wl`qU-yS1w1Nn7OBVo@f}t~?!8v$VtU<%>7|7Fko-sL9Bx
z{iCtSal@3E)x4GKGzGf7_$P>87ZRPvQk)YI@a)Lpk`0p#`pb4LGxyT-`F7&;ho1UN
z?$g>d&;EL6F2DC@;>3Hf<bXXgTaPXj1STftjqEbN`C~4w&hrubVYx<@X<6PX7jQaY
zlI-zdD^AWx%}o^LV-RH#ZD15-l3>luOG&MOb2*CgOG{GYGgBBuc~qGsI1|%T^O7rp
zd{tmt1c?Yh6clIV7nQ{4CFZ8WB={i`Ir(|%DB@fY@s!l!WROy*wNTNL%7Rn|F)m3N
z$r?2d8BrEVQ3gq<lSEm>xWxJx#cCuO#5imuIM{*S2LTCa?qOgMV_=Y!0cH>eQ6`|c
zS_>o00JtY$X0$K@4QXKnn$pGyHUwx&0+fYjN(YmSC=b{f2)~1ksS_&$8q&dJ0}M=L
zYyZJDU<3_VUzu1HP~SfceLt9C7NLh6SX&3P*d8XJ#vjb+7H(mKX+#QIu%1WEV(&nD
zwy>e=S;Pqo8>D~->j8!^$X|;%fq}ye2^(PGz*wk(Bi6wq#s&1e1Y1F3UP@xI#AA+X
zrR4ma(%igwa573QR<crvu2xFROwCCFrve~9uQVq|M?on!wInemu_O^80!lqdywco)
z%6L$M0E#K)CuOB3mnZ@CL9BvH<Yrc+rs!tom87Pp7Ae)n0#!g<fX!+pu&Y3(;Iti~
zBoD;_KpV_Vu-Fd~uhmgN^Q96j<KVXqSrvsA!m}oR8&Q;@T8T4|OY<_5^HWllaC#Gw
z{qb84@h+-rYPkVk(h%bW6vfnX0jNwP#&VcCYS|0UBzO}wB-~-@Q0*m<vcWm3C^fMp
zzX+HStCeyxlZq0HDnXetEwQ+yAhD>hG!>jP%TkMqGxPI6GDZeQ#(IVZdWN7R1IbeH
zNZAsa$v{R!voo?@JwrWXP%r|GF3Btbnux45ww6Ic56NQPg36MN{5%Dy0Sa)N6*Q7c
TGjmcD3^W;DvoSCP0GnO_oqf%}

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/stop_times.parquet.gzip b/tests/test_data/outputs/stop_times.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..29ecd67f7f10a602c6edc3eeb7076283075ac3df
GIT binary patch
literal 8296
zcmeHNc{r5qzqb_%lVungONfeSvJD2U6tbi&uXQX*c4He`MOj138)BjeEwZ%8GK^&y
zy;+K~WE<<)2ZNcho{?xdz5RaYyyv>kALl%mYp#3l`}_HP?)&?E?(cJ7&oDT876POL
zriufbnWcdhOtkiPAPXIxa18BZ{aRC~IiAQ%YgDik&*NM=Er=HcRUA7DU2|PKeG69&
zBHWYT&9Uy-F*ere>|#&>jVCkd&xz|V@aWIW>yz1<=A@ezz)kb8CNhKUoT%&q7qcq%
zAqS#J3_Q9|EAUAove?8w(**gt&Hs5Dvdj)V4#lZfM(KIIFhY&VXRB7<YxF8apvGZ@
z(u(UcUIjX^d_VkAp@}_0V{oA<PN7jlp-G6)xTg@2HFe~4>csxkF{jii{?t*k)Jf&k
zabzlSo7KoktBJiFzLCcThds2Q&4zIcwdGfpkT2i(KYN2L_4Cj6L%t#V=aG>Z0ObLI
znjcSj98WFMr##Z9zG|Z6G*QvAlyq6@i#*EHJZgzMCCi;!NTlQvspZ0yhr-l?G|H1S
zYOysX)0+Ca7g}2ohd^_qM#LpmYo6#KO5{+Z(vGUQ;zmUIPKyyyzEW(a45C=dV)TGX
zY3=J=M43R>h=fvU&2u+IscP4#j9)2EU~yaAlrS{p2%bq&<?UNN&!;V@DK<xycO8wM
zxg->kfv=>lM#l5WJBz6uCMAD;%JqCcZb96xRPwgL&GT7j*A$0e$-C~U8FLIN@ba|Q
z<ypteGbWcwerwbEYqRcaGuCTlfMI;UJb;ysZsX=B)BpPBPu9F;r`yG|twQ!=sG_(^
zI@8BYerC&4((I?8)5C{RkuPjco{ea-5boF?G;rljod5XTsAl|R*MO|`?p9Sji%kJV
zU@rTw1$+`uCv+WQIq|N^iUK{&kgP|{;MZp59GjZ0#mqedvoMVG=CzIYrdeMe(BN-t
z&xJ&j-!1eHBs<O^*_oe7H>NR8tXRn0uhAn;w2cr7%4Je#TqhMAm!(_Z<s`MT7OFIL
zd88IwzwFhpb<P*Akz8n|nDsuj2rC+R5FH%06t{c4Em_kuzbG)+nKWCHXLjvz(iB%0
zq2PWhF|pNh;z(EMk_&GM%I2AWKzNC1ewKdDXf!B*aHL$f%abMbE}-%c>hv&h%_hVB
zxh-(wE?~AXGeAqlxKi*LKJKo;D!OBw?L`YI?ykL6EbA0W2RGIrWqi9>a=wu#M_IBJ
zB`Eba=(tcEvp#dDF}QO-s90N>H?S`_Hcf*NM7ZQZFs1#WT!p5q+Hb9y5*{ElgCv3=
zf1=+jYxFrN1WW-6hhx#lsbJV>F<|yq*z?9b$zTBm$FMdOdU)YwTP<^*S<QP<V*&Jb
z*_QIkGLyxNVeu-3dh?aey?(d3$M>2S)F{LRQ}-dw9Z#u`blMG7H&`v}b@xY+Gv`V=
zUPBv^SAu2Z<V-z>^k?IOtF^p%({#r13p%qFua(S94wiK!k51idV(cPBffquA%H7mG
zR&xkGvYAlF?p!%%s*>5LptX+((p=#n^@^sVF^A-x^04KeWQn%2+xG*8!qK}2D_iG=
z8#`O&k!O3w0fkv_Ge;Y&lsl3ojLuv<LtK^mr20oosmw!T-LdTG)%IkGmTvQw^NGG6
zGZze<OU9b%rvmra6f0Q1e%weJc^9YNo}Bynm=9yHk;YtU>$y`#d(K>!2R8q<(c&KL
zde&Niq3}Y9>W{H4`ml1ZK!T3O7>ZRBV=xyP9~=!!k~1eW9908MS%KBIQ8T9T8QCs?
zse1<qZ&nwM5IKyKI}WwopZ;ifVPLf%P%EwF$l?Z2neVlMX=pSCKDW%jBik;PG<3r&
zb@)|ZU0LPJp*S!{yv@R$2-yI)8_M~PjW{q!2vv7El`$55g&^5*jgvr<v>x=e>BT2L
z#4pTy*WaNk)#+J5E8V6;7n1qc+7`*evuB!j$FpzjWkq2A|B<bBOj(QwdorJC2ka`~
z`YTN-@cvQXfS!We_0J%c!6BojWyl)s!{kRj_d*p2*yNXXd+)i|RnOo@Z91dvRJVzl
z?8n*4qVr}*1QgMNeG0sD3LNvYfzb-2Ix9R5_HK-9x+WWbn@@!xiO$h%TB&SB-}2j+
z7AC1iKC14O|A{*P$US4&WjgszTUW@!X9`~78R!q}rLZ=<!)K);vbM16v>~qbiMsz}
z$~MHV|KDn~UHHGS)$YIP;~a!FI91;Y40C)Zs0<dWoR9=q&vk(iHchB_<wG#&tu+|)
zZX|t^eln|ABSF8sZnC1hQucV|<oIZkB+32eeY8htH-6isg4}A}JW*pY^d{u%Uh6su
zyu$kH$J%6)VIwz$?OMzc5pfRg&J?UVJvb%fR=RHr^}zXl_EGnxd_iT;iMQC91=5fg
zb!Dg}et!DmWexaqR(0)}q-aMvKm1i{h2U!2Vy{~}%<oR3%p^-go5gGAeLa90#(_W^
z^;)fJfBaSdEXL|J>|)gNVPK+$;rx7f2KGg{AJ-D3X3&D^w4V6g-GlQ51IELbA}}0W
z`D^N&`lHL!j={_B0V~rfH_s3pM=03SZ~JasPCeH_Z<IQZZ)YvMo`TOJWNMo|1IM!$
z?8y@AyR^^Ou41=#WVE#ANYMFAh3=a}!oEhKm&U@+NnYh>MfV!1h1b1!5G$@@4|%MR
z7M*fYR&Z6Uk<rWh{824&(~h#>-qWAnF8pD8D^EP&d`nZeTPBG;vEj3)VLcaC$b!24
zu-_FY=)HyakIZ0WC)-HTq?Dr5Tqh@vd`Q-nV4%F8kxq{ijhr`?-5xFN*3Z&ozpwed
z0ciGMYo1;#@2s3nn<X74Jf)Z)3+HLcdsEcYUo!CJvvcjV#c(hhYpl}4<x6=vuje&&
zRu1CR@^Iv;bmZ%7$eB;ZE)_WBOroz2O9^my@d>4KBUxOhFR7Ls5>>E&5i${5W3W35
zMBrHDn4MV4o*l$D_(qhcy-6rBtFsfCVy#64nz%hw`S7rZ0aVf=>M-?w57sl7++FMe
z;efUsck%Cr$U<e~w5GWSm_tvJCk<3qp{Ag5<x=F$7NN|hw@V}{x=Z~sU4ucyEjP~1
zvlxZpZ#?x$frAmkbJ9>%4t{}leGW)Ss8Tu^eeuzQ!?VtMlVptRaTZTVCeHy5`S&Lz
zOWwx{&7})7g0Q>7A)$sh;9%|~EKe3B%&Z<Rw9^#Jl~53Bgz5wH6=8w7o?+%)eL?_#
ztlzwIB7!pAxn%nqa6ShvDA11OEi4GLoPY~6L$RE3o}q7)R|A{!dfqt_9t0YFD0^Z$
zFCqMxtTkjp?9E;);7+Xx&RrQ#wB0=Vi36wq?reG6kmHI7+i~F!OKER2r~|TQs?N47
zOJ=6pI;tFa6=wa|bh}2&RJZIWVNt}fXXqpnX7SWySrv&Zt?E2Q-ug4qWJzbG^nMEN
z+1Lkc-tEpD3rr;C6-L`yt*Pf>V++}`{9K5Ve5DD)x4F#z&3^UyWO>R64#v$mX>w(x
z-^4bH9MDwZ8nrzMcW(|gv?_`mNw;yhb0n~AxAW+Wqf(B__jM*gh%)4!*X;WlV)yp&
zoRoUVe@;A-^Yp7oWzmr}H6?(*o4OLAA4MP!kfjfS*u=IWueY(UrM+yA$w;czFzu0O
z?JK#`9-%RPsT3M@SLp;_4~|v7hv!)VJ+WU7RRt96=X9y|Fp&4&f%Dk)()7dLRy!u#
z^iCP}sBuqOl>)}g{RwWKOc7Q6Pt9%FRSLE#1jQ&zPYea$fQO_N@e6qFvhHKcMITI_
zp-Zh5s_)l31IGQ~<()22I<rT;rq?umuqSQ>dTl?dTHC89BBFp3)6d}rfbT$<o)LC2
zjdz=<g6sCn8-GZY(~iGzSE!?apSvj8Y65pi@4>B9QwW}STqfEOTvH=dz-kT&P0;B#
zoU_>_M&XD_sWI|rZZZ8JS~9J0A^)S4&<Owsp1B?)o%}v-&vMN9=jrU$wxhrg5zax&
zfJ-4CJkRLj1*G8^SbG%Y)eFZnpbGjMeLTtS%6#fo?nSYyOEcUuk9se_uF~HaV3Wxv
z@WSHA9;|A;nG=0l@+||q+|u7bK9WblfpXz#o_2uV2t(ceJqvYVyK5m){IBx0LW1HD
z7mXG!^2W$}s#+akx3=MJTZl1+#aELarxCavtaVX#2LpyX1cV7GxZ<hW?Q?MLj@UaO
ztY<x5)Hj{Y`OiSDUVyCT-6hHE$`)CK*nm|`{*5ldks2mFTY4(rxLO5tY7Y@-`l!o9
zW~e+MzNP4D*QB<vkj;*kiyyHfAvmwu?r0A0hpWqsv#+WgznO9FUz?HhzhOqJb3D(l
zT`;&MqAC7CbU5<C8o3LGN05q@l?c=oiW-S+d$q^Wo+{_?H3HiLo+WDrknlSCb&;aT
zLO%fAETLyp<MZJ<*#jJ_gguahQ>E}odLm2xLFN!J54&KMA-F<1!7theRB5;zs&lMg
zr7KL&1(#M-tOKsC%rBbFGL6-9(PZzn<*@c91;>M`m#sXx74dZCRZbS3I`RV{u<PAP
zHG;{|aD~B<6H2~)j6tE80@Jr;>`NZIjG!^VK{Ou#&KOcNIVe=cGn^l--k#6V-5zy3
z3A;b%k!<!|p@w~GFYLG_Jb-2G5pb}Um|B5t#lcpqT^Z2O7?`bCp$*ePz8bNSN)gRz
zL0*L@$r;700<@WTpL4xUlnLyL&;$;UbosRWL&4(|J}L7P0_E7-@6rJ#OBCUc%jpA8
zlPZ)<>qC(FjCL0tns!63tbnRd%NHPIf+Bl8q*llSk|=#lgFl!NgmbdcY&z)AM(no@
zC^zb;j=z|N+tYQ*-R?25#cbgMkH*0E1<5NkHy-`Ljo~lX-MD?Hf~P>L7JHf<Z`)jy
zF)ZRZ;&Cs*j^@TLJNAI#b^&2>3a)IbD&UJ7xj|S+J)Xfgo#)wIzYRB%*WLJZ!;OD#
zxY35@#?f^*N)WY7AERk*EDMM?DoR~<<CNW|8xPNRhtk{_vP#c4><j$n#yXb2dF*`D
zId#sERvMpao;@hd68a$Yj~$cYyReE}N8hKPY|-3-%DKkN<tn<jS>#-(w8axj1-)=x
zJygARrU4)02`Ts4Zpfz&;^}UHmcQ?9s{ty7;%iH(9zAO$4MG=F^`U?~EO63FfkVSt
zH>4vW?-0G1;XN(n&UhRFF3CIbjz?^!Zr70QD>%pE^q!UReX^6MH41I$GIH7?Z&@NN
zc!Q#fi!fq|`%CXNg<gL02Ci6}e<K9radq*HWUEHrvelfr(6x3gg8;%HLiZ#tM>qM_
z!~N8N+a#>t$H3|FSJIX2i<y0CtY88fXD@M~Ph(LTWKthQkJhnxn-pt?b;a>fZurVt
zIi_tJtPFi~D0ac3mRY}k&8ZQEK|N5$Eckb?9YLYPLVB(X-bVwZR<UcZ9BuZ@JEPq9
z_<H10DYur)uxiUfx8Q$aX10@G_;v=RbthH9#-U9s7T7~sFQ(4)7oS1#Qy4J?@n>B$
z0mvtk)``f{`cAXi3(pQa`;p?zsd-5IcoX%p+mqd|$nSfkf`h$2NiH$lw>=1gNlR%y
zCX+7n-|hZ0;-b6IZMhpn@S!FQHsB5xzBdk#bRsYytr)}YJd#*X37lxE2}yggM)5_)
zqW`>^4d+vFxNt|;<z~jGS+(4Ae@TL#-b@*4Hk6P&q`LxJY3ApvQL0aghoGAl)1oAg
z1-a(7G*Kz4boeg9)HmN0{>C>S4S8@gn2*OE+wk6<ymUm0K|uTfm!Pnk{H12lQCs6I
zJ4LgFq_a*&?_Z6<UtQ!DjiwkISSUXzGE~}eVC<MuShPNt%*{g$cAVi`w@pV7l+d$;
zA*8ByaficS-Dfk)j{ucoIO=QU+6AO6u@0aL{NfC>BMxW^te11YPPD8FnXpGEP*09j
z?XAuDP_1Hp1AJsHwWM)x-TaA;ApM?V+0w}4Q5bF|+hI$yrbW8)z1F8tf2sHQlU5@>
z^=7DRT`GrNU)BGpvz&h<AtTcumvbp;jlBi3TTcD27%Sv9v9|*4$3oQgIm(7t5kva@
zdv$Gk%+S-CwQ{QwKJ;v2nCqE2FU#*#o06?$LT+Au;>?nrpFleKK08@$r8L2Pig-;+
z5j|*y8014QA9E{lI9IqFp*X8-*0odpxYFmtnPOoj9s-GJI*tj{rS1u0HP>u5gi~;&
zYOQ0~hZBX1QmV_AbH9lwiXwrC4y#Nn{3I}WvdEp8Xl@_fQEu;~rE^;sDH4M;F-PjR
zOe<2g&>6gJ<#~jt>?n6mfgD*84V|7?u|cJL=HvWiZWFGxV^S&wQAox9-iz5&(6MPw
zoLA5JWo_YHiIgSp`^%2QWun6V(uCBy>Fn7EqwvfbOclrlYv_$X!-nH3H^TZohgOH&
znILASUK)$h{=AkNh7qLb(z|@I3j=@ssqF|_S#^a2dFg;mz$^4X2GBMqH+u*Ft*^}9
zUQQm?PWE&_w!I7>Hn^9UlP}!G+S|$1VK423yxz%S@8ALV^7ip^_@SE_;qAVTTo3!o
z&Hkke;o#}x;AZEr)y=xj=Hc$-=DpRm^BX5(tCMw8{aY9Px0%u->3A!c_`&>W2^KJr
zi62PE|8+4yCfWdRtvK%@Gd~?K3k<~4LVMG?et}p7HyU)bSL*y=+RIluAOo#OB6N#l
zGXYy9RrEBH&~+B?eH&m9%aFuI>n9d0D;UW3Wg<T$?kkldZ!0^E3d;&p{82oaZxi-Y
zYPW#x>%e5bpMkUDo50QFZy>H|@;+vzA!fyYMkLE@rs~Igv$fg>jO$7=wAE^S%@l}*
z{%h-})s{duxxU@dEuJI>8czv?#<PhAef>p)?&sV$%Wd*}?}9C+<Q+7oW;q&D(ZkLB
z-wgR>{wLHnnZ8+PiwCvNGokkH@I;>4<e~T9!cVi%@R6rz_@@laU*th7`Kaw*T0dom
zVz7lrZsB{@@e~6Zeiw2hCm@#Vu#MJ_c;1ROMw)|Z1_f>RfV<hl5uj^qfueTqE<Ubq
z)?fCk142|q#5_>+f|G-by|o+M)q(ci&Bw(>T13><!5eN5_lAG#`ElIM$JGOA?Tz$U
z$B4SyUUIPW7Ny}fq_zfJo%|i_57IWNgQJ6&Xs{(MVq*rsk{10kt96#YRsR<9;{<35
zit@kE-{=pP7WrwFqF*c2@5R0g`iF9xMQFWf{a*9u$RCCOHqkzAPIm704x+zU>bI)+
zuhsu^>_6B4TGD^N?C*m9hZHxT82^Uu_n|+k|JQxB`BeJ7?AF6;J?tM!e!FLg-;4ef
z_zz__jruP{e+c`BlK#J6-<RQkD1rRF#Fyd!lEjyi=j8zRcK4!H%0N*UCtEML7jnI7
zUVtOKX@?-%!RAZp^mXt;IJvv6j~qU9SWX6VNCvXLpEqir^>=miYZY9l{#tLp$I3ut
z<knZQe!g<@rZIjGwG5^M$$Te#&;#jx(cMktD}l(CvdBJLA14=kkwg3G=4l5X1KR%>
F{0GlC@j(Cp

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/stops.parquet.gzip b/tests/test_data/outputs/stops.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..10a1d132786d559f46e915dc36d64b1d4769af4a
GIT binary patch
literal 8421
zcmb_ibyQSqyQe!8n4vp`p%EM!DG3ookcI(<P#T5~2|+?ix=~4K>F$z{kZu@SQo1gp
z$8$XEobTSd);E95+VA_kzvuV#n!WcZOFZTVAOH%80a2(Rz#h`|H5vc|0f9RB`s@4W
zN@Tc2iyZ;s<$$WL@npgC{euEa@+zkB^4Ag`2UxF%($w5a4hhd`y~PcL?y(riGRM&h
zH@y*V8^VE-w2(GEkl{|?4uMiE#y3haamU|Lgpv)#Hz}HMCxUyRRQsBZ_kqQ6j3Btk
z%nGvT620g$p(vbJbOpP9iLHK_ydEx8zk=?$#O%3D><Jg}T*1Fw;=Ej@zJ!Zinv#Cf
zY?4<mPPm%~rQ8^Al;JOqXSaZoPmVV!ITk01Y(lBez>N}kwsG_naFPyilbn)m0&fzW
zVx_84n!`4pO$SakTGjN>$~K^8YEd+-`Btm4KX=mY5M!cuq%9il+$orf#sqKTo1bKI
zCzJLV13$*M=qz)mqJoTX2WvJzPMOzCS#;5xlh#{+>dmX^Ey8Bz9?mRS&dg6Q`cQ8n
zynmNoIEspZa2?SZ5Fi)hpQEcAV_rbWeajE|0SyD*P;l%PVdF(uGv<z@OW;Q0d0f<l
zqxBc~$)U6GrRT-oMlIdft2b6m&ZP@wfNhkNM(i_h)5$RA!|X|-`~sx()<Ctd>w?S5
zwMCvxhejS`BgmYsACXB`tdBXDo@P|G)Qj1c6Ma1w4d(9vs_rN=ltoCHD^nMeYrADr
zt~_LTec3J}oPWk!`U+ZIPvFRxuFf@c!TK6MZe6;}mUtMSzbP|+b%Yg`?}Y3gXigNW
z71Tmn<R95=*dJZ3pVjz~+@>#Jx=g&T3{gb0QT&UcXQp7x=c1;UhS3^r3>AiK{PS9<
zef{pZt#RExfe+@u&7L#TUwZKbN=|&)8?0+8GUq?%45>g`&5ys<E+VN3H1aDZG=Nu9
zwog(8z2Z82FaY0N#KQ#MbfHjHryNO2%+81HH`sWIxANXib(j*Hdmy-Hxjw*vPJRNm
zF(X#i56DjRrBLFzsOsR&?3QGCLwUNE-s7kf8S0sOq0z94zmdh}Kjr+~UC%YedTbQd
zkQj_vK+(M>mzQ2$KJ&<;agmWYpDZ{npY{qB*V4>}R_?+~kR(Eo2kd19*3$B4Z};$Z
z@pbi}w8&RYv+|@SskqcI{}|qJe~KKyS(A|1O{hzTmO|=WU=P1Ncc*iAm)mia&H-c2
z-Qa<b6DVm?``M$pu$6s$i-brOPVAdP@uLB>qyKZEJdBnI)seuE6zZZ|k-C3h;yy%$
z;3$O<C-_do`a^BT`dtBz%hdjr{e!d7*^9FyFAJp$@rp%ae@Y4KIgCt|4@ij$CT@8^
zSl8w-hKwDV54%6|_*&Ko1c85=VOpJ>%X^p!Rdclt72Jua9vV{O2SUSbbSBv{f_ith
zC(DG2xYE8+nLKW0v6l!8iR?7%G9S`lRjbLe8{BU$HIrzdGh*P$B;x@jc{eF=C0gaE
zD!!&jezv!U7H=5Eg;HQ`)W!YqqnXYf=6ABLXK^Hohsm;qc?BH-Sb0jXKPz-kcuNM0
z$6JjkVdz2Y9(orurQ`!&@@zyr=y)=!0mLhl>T8{o@2bZieUfPvg~>iTo1m7yS-OVD
zZk)f$?kIwz?W;TZ2vKuo*tpCn_06z|$06R}MLg3^u^_cmk*fo?T$ktpmrjHWU$bzv
zu}h?C1p5+q;M9b-M6nnia%}qDn5DO-=Ixq6GLcW*3RGE8urom45=d*ci*6(9)#J!i
zMt;jkMI#IM$t)i7$(MK=d7-=n$*>pb67<z&f$lpzcRpBS5fbmHM=z;jr5?rg%2CVx
zSk^<_9qDL*H144@<Z9ne#*otGIc6?4-Fhn}HFZ`X=2@g5()EzNo)1O5fF-~`r`<#S
zlj9`+Em&%xQAhEcQn~8pT-^hD(23e;D2zcsRJWOqVF3fh89^Dxt49B`5c1<ps!#zS
z+MMv#R8=Fso15E}$JMmkdE(L_M@YG$GAef<&ofi~7SSkCR!*hZZuNBl4)Rc|P9XYt
zZAO2ors3HP8^AB3*T}vSA~JVil$lpK*q&E27PM2G+O6?S^;4$83hH6|>+bg|XU8sZ
zdB%1nlhmSn_!$xECh|J+VH(*IU1Vajkp1cs`**En9O}y#mjcWhA#-=7ETiK{bm`V7
z0*}b`tlsx__F4-`I@LVaB+#OV*Oe}`L+u$0be=4~2qPI{SR~i#UKq&k8F%neMAjcy
z-dLlH$5?<URWW_`Y(G_84@Ymd&s*-6d4cwg-s9m~58D#GRR-*GC;_txh8EJ0CcwkE
z6TXq1IfGi@S6mO9*_gAKL(xrdefjMr2^sP5?XT~+>W~aN(kVUfjPgF!ei2Jso7)gS
z)>ZAxEILz}5fsrW6*$%P(%d4ple0(%3GEYN8!n!YIr7m#oZ(Yoy-BgqIbr(xr30KT
z(^ubsp6OCl`TiXSz~1;t+-RRlq}m*ascJun6>~Vr``bicTl}cPj7X{g)cAD2V02Kx
zaFvx>Khnur%g0l;ZWgm_)B`(Wm2pMG9?bc;DheHb&oSDP@GTAfQfKESl)b-#w-u{7
zUL?@%>7Z%WvdT$kCVovQu1-k7J!3uB4-+$WhACgzyI3BwDxnj-l)UsHV`)vzttZdj
znfa8#BXdGiple?OE9u=xkKoQU*nVu|1A6*w;2`*|r^-k{sV|O`zA0Xvyi2kLStpoz
z!(ExyDd=<h#O0kTrzFHLj@~n`Xt`f^85k!{hlOZl&K49KWMiQk78~E91V6WP^S-Z}
zJoR`A5m28OS-2Qua>tI<n`gO*H(yZ}jOA!`JFOMRhcq1fu2<-*jn=P$Cy<ruHlOlB
zU8_k(?hPb|mwUToA-eRqw!`gcGm<&)_-Y4?qa;<Hs2e@UNFNz0j~Oe3ze4SgtKFl$
z9g-`ATlMU57V~XhLQlq#FM~jhSK7(Yv{T~*A=6M&?huRYoIW*st0km$O&8PAr@~?r
zCaBqQ%PTLm$!Rlm!1~eyHLONZ*N<8}e!6ek3zVSB#Q1}?O2kv);!n==o+W;w`(i`&
zf&=+VfJrpZ-vJ3fBjhBwobg3;J*64>+h(UVth6CYMFp~|X!XH+wiT&Q(0zH|m~eOu
z0)SGjhwb2~tY#wLPdwPBoiB5@#HpRn(-KRCWYcE5nTjoNzqJ&!WvS|zOMm?Ev43z5
zIRJkePA`Mkux-LC0w*tR_|n_cRGl0!;8P$HbNE2fKsU|5HYRVWL^Fr~OT7FoxxE}`
zs{6Qx2YP%JSWkv%jsOlTX40^vBGM>mR;pT*kve|x^C#yV1z#+d0S(ULL4#$2@*6{>
zA49+Fu)rGGcW4G9scyFh$wB2jLQfoDtDw854dNziWv;lC=q~hfnZjBM--atIz3u3z
z_ZIYiLEI<(ma@q)aF6R*GH`$m8L4-QR{sc`ONd-crqflsCB2!@Qi!fA{$X5Ea!A+F
z78Y=Y{rM8ZL0@v?aMDp}&2I8hr0S%SNrxJS0rR`k@~Xq}n4yC>p-5LIkJBwX2BzrL
z3bt%~$Y8y&Y&D2G{84|L93D2E*quXm|7fD5u%w-QfrFqbzxvP7H80_av-NBX)`krO
z3Hjw)7MJ#*wed)TgSfJXLR3tc^SdIJ?~u>qmV$4GuE9m^CiXx+PxDg!c<7gM<4lLk
zbQP~kjRd9QmXYqnOuj3hgorJvE0Ke(pOlVXUf|czN#%^;+)Ux?-@tjmf1bkZsxCUc
zniL+<L~Bq??#Ej1F&Xvb4?z$dTDWiC7e45Na;}(L>zZtY^5(PpzVT8p|KT~c!btfj
zpi8vW(FH$aZ}Lgc#fp31h5PnV%68p~*()i!w*@A3CXszH6)4=KXqtu*SeIKysKmN}
z?nusRj#1TgL$5b6c79mWR-v?rr>u%4JwQfWmHD{3eem0y`>rdJgx)FJ(WtdMu99ga
z>CPFKaRv*OE$OEA7@*8c%e#E3TODH=b<R>YG+(ouPcXeZS+-w1qV-Om;uTGML1Xx;
z^Ptzmr$Y|9+e>o`y}x^mKC5~r&{~w8*5*+}lwi#5dsAFxNN`X4q|G|L+dWZH)a)Oa
zx!`DRWXfCQQG;x*vU7Ib9R^ms5`AT+ZYHW8A`z|=XETJYec1`dG8^(+2b!AE1WCun
z6+*c|1{1CI?iXlCC;5nu-7-j7b>)|9MEZpTSV^i1$~#5@wUINkdSBg~VPJ_dfv_f5
z7|QZ~!Zc69YSM1MWpxlK3R<5XcA+ti<~{5D&AZ4Q3bYrYy*To?XL-+Aw)(h-);A62
zJoXwjJ#c}qXA60@hD{*`kswvA;?>6>M^=95fPuB9oiRnQwXA?iA^F3q4|+@Ev>+dC
z060Mqq~*vs;yfTY*%K`kstB1sw|s82r3gGAFO+KMfN(ob=(dHzN(;+R`}m!Ja!Os@
zu0hqFx3w?6YOb1+%Rgt=@FR%@;Jx(_*^_*4XbH4zl1s+<QdUuM*Mgyq$4xhu_X6i~
zFw(N%tf$dXn(1m<7L8uXk%XNNCt+DAtVG2qecZ|9yp#$p!Bm!0-nl<*N!m)s8SMLf
zkFmQ@4M*LhNMY@R$qq{?jhPHKa^hA7$BZt{d%cvpSQj+5kY4=F)7C}>jl1?RdgSQc
zq_zbtw0#CD{1`b*r3MEt9b!3y!~yqohLpLwO}he1EN^>X$>XnlKH=Um0ca}(M7q<T
zgp|c{WI1c%55pUIEG&{)d>c<2@M*aQ@xy}C&e&w-W1W<#4`XBfPDHaVi)yoJKQI{t
zk=@T<Ybc0Os;^(fKKL5tvAj;NcAP&ZxSbo1#gV60O4Tdv?zYk=!iV3&0O)p32tX0K
zs{5==tG(v-P*nNlL>quiCQ3|&Vsw8sqyCW`&)H5qP5+qGUc{;08ToXj7z-8frRUf5
z%UxY#`V@H&5Zh?v=wq+fnAxOCW_na(Py9y)&Zqje!NW4JLWPmBBGxx^@a3loS%a7k
zz*n)dnFM1UlsPd|G%PslYD4d~0abgg<(xyPQYSJqTY3z0N9H`LyJ(1OmaZ8ekx_?-
z`#r()X)n^hf_ZSQ5ff*oyVvcA=hnDb1`<XR9~cGgQ=&{k0{Em8R4@69yQiLr7hBHm
zY^eylr<5JLt1Rw17>#f@$m5Z_cHbRW#ETs)v^cElgGl7VU8iaA$ZO=$h|ugF9O!Ya
z4dLNA29!&!R@&Wzmw--NrSQm11eWoK<_%UoclWvqLCx*xg%!#rpRF!nI_9Xs)w?GT
z?YE1Rb<6p%<c;PRVC(4G-4&y~OVdQ4DE}_YKHcG~oGkV@s)Vf7cL5h>4Z(Y7In3r)
zDpPgM?9ai*BXUf}r}l9+@P4b{RB)?5nnLWUjdRVJp27|gZ)SS3B6(5@23U{FOFziS
zM^45WwGUqcIW)|5&09fzF6-$j@Y+jTi3d*JMiMQYX*%eQ!m<4O3Mc1As*9pq)>!Ii
z^^*f?(k0;}$W(~@0@5$bg};?i1`h&vcFr>lG&~T(mbe-&@B)RaAGuLbTQR(NYCrSE
zf=OI(^Zfn^2(jm6{1cTkCP_+B=6;qf5|j3&8*kWvi=}$sw{sKU!#3)bJ?F~dzI+Ek
z{DG?tYE%K<p`Z8D*OR)0;GZY;!(k_7qNfz-J8x(rl<(uDwXd?QVx+wAF`~Uqj0hW$
zyblhy!$40bS;J^Xz!Z7H?aV+|=Y(X3*~}7jYwpz&-(Utd3ilu_^8I@2cG7V#(fyfA
z&xUD_?TbTtG+%DElCHgw5vdB=hj-z;<rOW>)w+^L2G;6{W1L*t#U^(NE;-sq3txEN
zRk2QO9128VRT^7$Nq62B;Dc2v<UXZh-3PJ1=O@WJs;xn_4O(Yxv1Jegn~WoHY>zQf
z%B(v-N5`6bG$}nk_-b@QXPX&qv;b!W1ksCGk&QSar@{^D@p<EE8HHSXQBnB@X|pCZ
zvJf+QI<uwAzB`p_obbgtWyZMlf(##EJ$IP`TWvz}`G-02nYCyslE+2Oo!+x1*@dXh
zUsZ398m3)}ri)`@@f;l6XVcp{HiTic547du$Ave5K9Mw?qbO_}fI@H=XX_A(v8XW1
z`N~!szR4mCyYfz(<HI(N5{M#Qgffstxah;4RwGDq=DKxH4h*Ng`+`YYnVoHF9eQFn
zJ159n*=}6&O^z}prh=csijCT+F^cdma{N;>`FSC+D;*ZK&8&Oq9~qd~J7_)}LEyZr
zT&`x_<NleoAK>aJ6bYw-0uHn!uIQhq+-&M0pTci-Ymnn*(}(Vl=T3T#<@mpDJzvaE
zWz7(u_bW}N2h~+3k)woVX3Do)ch0tAy{Y!AO){cN!H1kbb-wjpod7VrOI1RcI$J4S
z%6i8cC053gS14^eNzRyhQ!e~rX`9n7RED>CE{AC7+XAjp64DXydBTKSmEJG_c<40%
zyJdnJ(Ap`1AH0@l$CiM#B?@byC=tY>OP=egy@X?Eyk8_MChW7z{Sp8&4@%b4nLu3>
zfHinxK9WcI7JedYVSDl*uDoAsQ4Bj_!sF{FeK_|BpFXvi;-2Tez)nDfM`Q>3yaaEO
z@vtRXbG!H~-snVwf%S2m3B}U79DKaFYce{LN_kU9p_tgAt1Gz<Pjw`-Pr3y(U%>nb
z5<a>A&1r`kbi_QeeP$z>QaaVi`rOE~SuI(HV4H?bS>l{KBK}Nb=X6&=v{$)oU>MRE
zmHD=Q^bQwK%Mor(SbP%!#p12wiMy72O!4}rPwgPhi>Qoj*|#r(I{d|0nb&hAJm7(C
z+7{n1!9_c5mDu(JahVgiJ0{^C%y(J=nQBM5R4;4p;yC5Kdo(jv!~8{JaK?!?AW`Qt
z{)gFlgE#d(R35ej>v_ReMph)T7s}NKw*twBKMpWm96M$`L>Ol0$~}!6hAW(}NuH`O
zD9;lJ8|)Nvu8r<SP_@vvW}XyXEP2{Xf3qE99letG9<#|DCOv(US2o$qQU4f_!wC`^
zl3y1d0#$zuI7crf+H*+HU6o@bFwJC)BWJKxKM53O8DgT)^ucR~4&@%t?33+*WXI0!
z*|(XDt0`ihaa|TE<>;If7!ZwCZ-e-Ds5Q2{Ut`L;s}+g4m6}#sDp$Hlv|(?s)$_i7
zr+C<<P!f-Pg5M{EX_E^2NLJgeqv#pvuH^i<8gq(=pPLYYbi42h5%Nu??ayHq^luOU
zh7=w|OAsN<eYpRa9faYN=A(u6l>w#rwbZRrvk1%gG#?4H9+U<~vG^)umY6{YeL23-
zxT_@;;&9{YYG}m}z~7=je)8c%1<$%Wd%@MR(Q;C}*K&`d5?#b`M6%k|bIpbArwMpr
z?0z-zEyL~_?=F(>wjUKaa)T6?F70J%yy1BZGy+`>86syw6mXAF`<bgPo;IW{!@I;@
z{?w&HJ*th^r=K*t_!5_r;X)T@m9{dJSuI-v^LW5wI4n*8&{vCdq&OjPw6VRg`xHXo
z(9O1df?2N|yE2cczVo&gxJcF`&<7id?w42BaEcVF(ydKo+x)0biB*}<w(zC>5oi3>
zE9dBQW5=oB-8>t1+38pY?dinFDa(wzJR@sYn-m+zo#2#t$aeZnomf29N@>4shLepw
z?f!*B(Pv@(;}H?}elYN4n{`9eC>bi;MtEo&|8?_m?Bu$dC;MK79;`nh*-iCcQx)3?
zqaK}Cid*3a>^GEs``%~MfJ3K%M7Nec(Ou8oj+ry@V{<Nd9->20+=>vt-u`c4)uu*}
z)Wag@Gn;!*_kQK&^N*JT=9C#Dwy#_k7?I;o@@r{1XO9mKM@<?gH^pjrbqY9Muk#JI
zt8ee>#knz4sE+!cXK&3WDQI1xoyfj^6u+H*a^>ZPpsV>_^UuQA{FjG)YD01lqE(2{
zcT6q)uypvo%6A4eO1-7CEpKA^=<MUiHe_lNgR3r|=%e>RMyoV1oGu}+(JcB_X%0Us
z5dM}w3@h<f&vIJ$zWwgZa>vz_eZ!H*iXAgELEwNK_#GS^xI2sNXh#;%-OO}C$Lp?r
zdu9cymz=$Ewgt973Bk`l+7N4F3f1I<1bD0s?tszr`?ImU)B>E;{n^xChAruwWD7UO
zB<y5fkZwKRjutpg-I6~MzxUGjtXDyAl2Ff3&-z$ZX+x}8CZ@<m|6Z1mPUHOpl<i!v
zHLL)qI8{?;9V(;f`n9Aw-n>%JiFzE<oGUo$*`8AXr;E{}W0<vbyBqGl$eD+}H&}Ot
zF4V!Vq8!|CxN~d88u#jb^FsL!D0p~MI~iGkx9x3AQNg&|4t0Yi=9~QSey`1|p-t7)
z?q?&#fh!XwM}XrLhS+R@b+>t;$9Ay?v={Wt`|Y(@8_7?7!L~07H9InrI_lq_Rd${1
znOkb~)!v0%tnbvi!TLR(&DkBmwp8>U9$qBe@d}Ua!aQ>u7R$*xmcI~4<r+OrbC(uv
za%XbU){k(HgwND8yS<0=%%1rM^g<w(he^N>)Wa3JA|FN1Yn!HekK(Fg1p~rHX4kgM
zuM$=OZkMgwVypUDfSXD{B29XLW^ZH$wS)j}AplSSFAxDpB&g<A#!zPj01hJ(3A(+5
zwT-^HG2``p>i0F~&DzM?82bB;6~yvp2kVCljGJQ?)>a6B-*+Gmzi1GzDSk2{x*!1e
zkVq*=Lzplq04SsY1k#^Y08p+4z!73#Eh;Gj5JQ**LlE)D+aZ#lZ;22PfCvbr6xYu(
z5dcWnBBoYU3INWHia(71p~RmZIEZnrq!m?||93<T-XDlR-TQ&bqXBNAUt?nM{*Lhj
z{*L(`fFGDKUSJ6D8si82J0=`N0l>Pk>{on#`8WI>5)Qg{u*K&L5kF+Ffdu2jDt@o9
z%W=cs99+8pH#HE4Uuq@@uGK8vzgFX`$YTdIloKbxh#tl?qf?d>|C^e6wLe0Q===*`
z@;yTJYS;K&B=nm=lVB7jq1{~l4ZoxN3-9s^AN3u-qkE0N<?<s@Bp5G+e_Z{J2UZ>-
zUg!2YbtGsu5G!MdJxMI48=aB01<cY)|E2_?_H=?Y+HQ0v=1>b`{qGUJUR%K|EI>4L
zmQV+XF~kA#=g#kPE10E?i@t-4&36o)wc#_UkptZ|?g!N`fu*@K)R^7e$^mK$wWIUU
zy*}~7f`6i>yRquKmVagc^T_W8Tr=?V{R97xeGd@L-@K&z*?0e$>@SCYr~6lbVOHkf
zo0sn27;l9C(ZOE>;Ja!6CDxnN{qL-Q9Q!Y^UXQ2$LwJ9l`km9n!W#0=3H?LzZvnP|
z+?3(J7toK%`Qy}ofz<gwyLco1o#y{tsxJST;zs=cqPXcoJ1E4#+U~k}-RLaL4ecOy
zF5es31Y+-C1F^G(L2tt72(_~}zfR+|golfVmxG&&gZq1J|7b(~zqInt#{91O=Lq;K
zmV=vv_q!L@svXQ7t~LG@s_TJ3!tobkb{iK5GixiFp9*MxG1IUZ!ptp<X}InnXkQOc
J<?BB<{11LA&dUG*

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/trips.parquet.gzip b/tests/test_data/outputs/trips.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..0bd4afa27734b06f8d2c4e944aed7b723700698d
GIT binary patch
literal 1475
zcmWG=3^EjD5M9G4TB0K=1Z1&^axgH+cLNC!_#b-oATNV}0K-AsPPQZRp&lZO{~Ou7
zdH&3pHAArG>Tdq|`)YoF^!~=+C-c)j45$HORu)#XN{?>jWnf@oIgrjGyy~csRIzu9
z+oM^@3{GAdyU+fUc|Iqc(R0z_K!h;~IE+~cG=_tDL!+vr!ngk`IcCK83Lju8Ecu(e
zEK*GLz_BKT2{l+vFok%^^`JjzmxHvBY#@_W>4nxKcEytVnmc5_%H*Atv#BgRS@!hY
zeb#K*DkrdcOp-AkY{khLskw>3AQEK}ZD15-l3>luOG&MOb2*CgOG{GYGgIIqT*awH
zWtqt+V(cYFnFSz4VnUKKl0Ir2GNLS!q70Hy(?nUsgv2f|ip^k@WDw)9k>H2}#{~m}
z1P3FOCC0!YDFX~^22m!Uh}r^1m<ezPzzpdF8M1&8Xi6JYvjj&1l!a!>IwqJYNREb?
z(*iPQ9TU(Tpe`_w;NXU`(9GGy3^NDjQJ5hMK!$8$1{zYL0W;*O8ia*vh}Z!(F(IHI
zCD;lQ^HLIvCGN3SD<$XWl;-BegM&P^SjkEux>_kMGc_kAJ})sh70Ayk&B@VGP|8g$
zNlZyBNkoXC%IB5l7F5QUR2G2rDCH+*r6!js0rf$wf=lFPR-~rrX6BWorl%Gu)y4u<
zKwN;$Y9+9%K&Ifd9iar(0YDqfOt9Dw5wF!zK=Y*%ERo^24Otb179!Fxek)PcQD`YN
zli;@zNfAlbfg`ghHL)bW2$%+{m2xtZiV}+|LFpkavAColv8b>#6`Ux_Qj3Z+^YcJ5
zMg~U4dWHsihDyLlfy8D!Qi_1aFUV+UN<!AFXQ*ckiR7Y^%o3o9$Xa7-86@<OEY>Zk
iEXl~vQ-B(v0Jm8|BdIhqCq=<PlVK7I1496?_yzz5^N$1o

literal 0
HcmV?d00001


From 9352fae8466401b3abde68da721a34153c277ec2 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 11 Dec 2023 15:20:47 +0000
Subject: [PATCH 12/30] cli

---
 gtfs_skims/cli.py             |  7 +++++--
 gtfs_skims/connectors.py      | 27 ++++++++++++++++---------
 tests/__init__.py             |  0
 tests/test_cli.py             | 25 +++++++++++++++++++++++
 tests/test_connectors.py      | 21 +++++++++++++++++++
 tests/test_data/centroids.csv | 38 +++++++++++++++++------------------
 6 files changed, 87 insertions(+), 31 deletions(-)
 create mode 100644 tests/__init__.py

diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py
index 328bd9e..110072f 100755
--- a/gtfs_skims/cli.py
+++ b/gtfs_skims/cli.py
@@ -1,7 +1,7 @@
 """Console script for gtfs_skims."""
 
 import click
-
+from typing import Optional
 
 from gtfs_skims.preprocessing import main as main_preprocessing
 from gtfs_skims.connectors import main as main_connectors
@@ -20,8 +20,11 @@ def cli(args=None):
 
 @cli.command()
 @click.argument('config_path')
-def run(config_path: str):
+@click.option("--output_directory_override", default=None, help="override output directory")
+def run(config_path: str, output_directory_override: Optional[str] = None):
     config = Config.from_yaml(config_path)
+    if output_directory_override is not None:
+        config.path_outputs = output_directory_override
     steps = config.steps
 
     if 'preprocessing' in steps:
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index 8d15bfd..b573129 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -315,20 +315,24 @@ def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: n
     return arr
 
 
-def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]:
+def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]:
     logger = get_logger(os.path.join(
         config.path_outputs, 'log_connectors.log'))
-    coords_origins = pd.read_csv(config.path_origins)
-    coords_destinations = pd.read_csv(config.path_destinations)
+
+    if data is None:
+        data = GTFSData.from_parquet(config.path_outputs)
+    coords_origins = pd.read_csv(config.path_origins, index_col=0)
+    coords_destinations = pd.read_csv(config.path_destinations, index_col=0)
 
     # get feasible connections
     logger.info('Getting transfer connectors...')
     transfer_connectors = get_transfer_connectors(data, config)
     logger.info('Getting access connectors...')
-    access_connectors = get_access_connectors(data, config, coords_origins)
+    access_connectors = get_access_connectors(
+        data, config, coords_origins.assign(z=config.start_s).values)
     logger.info('Getting egress connectors...')
     egress_connectors = get_egress_connectors(
-        data, config, coords_destinations)
+        data, config, coords_destinations.values)
 
     # convert to dataframe
     colnames = ['onode', 'dnode', 'walk', 'wait']
@@ -337,19 +341,22 @@ def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgre
     egress_connectors = pd.DataFrame(egress_connectors, columns=colnames)
 
     # offset IDs for endpoints
-    access_connectors['onode'] += (len(data.stop_times)+1)
-    egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)+2)
+    access_connectors['onode'] += len(data.stop_times)
+    egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins))
 
     # save
     logger.info(f'Saving connectors to f{config.path_outputs}...')
     transfer_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_transfer.parquet')
+        os.path.join(config.path_outputs, 'connectors_transfer.parquet.gzip'),
+        compression='gzip'
     )
     access_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_access.parquet')
+        os.path.join(config.path_outputs, 'connectors_access.parquet.gzip'),
+        compression='gzip'
     )
     egress_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_egress.parquet')
+        os.path.join(config.path_outputs, 'connectors_egress.parquet.gzip'),
+        compression='gzip'
     )
 
     return transfer_connectors, access_connectors, egress_connectors
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_cli.py b/tests/test_cli.py
index f5a59d0..a8540fd 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,9 +1,13 @@
 """Tests for `gtfs_skims` CLI."""
+import os
+from pathlib import Path
 
 from click.testing import CliRunner
 
 from gtfs_skims import cli
 
+TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data')
+
 
 def test_command_line_interface():
     """Test the CLI."""
@@ -18,3 +22,24 @@ def test_command_line_interface():
         "--help     Show this message and exit.\n"
         in help_result.output
     )
+
+
+def test_run_steps_saves_outputs(tmpdir):
+    runner = CliRunner()
+    result = runner.invoke(
+        cli.cli,
+        ['run', os.path.join(TEST_DATA_DIR, 'config_demo.yaml'),
+         '--output_directory_override', tmpdir]
+    )
+
+    assert result.exit_code == 0
+
+    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+        assert os.path.exists(
+            os.path.join(tmpdir, f'{x}.parquet.gzip')
+        )
+
+    for x in ['transfer', 'access', 'egress']:
+        assert os.path.exists(
+            os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
+        )
\ No newline at end of file
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index 2922397..7f0c0da 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -163,3 +163,24 @@ def test_convert_distance_3d():
         max_tranfer_distance=1
     )
     assert len(egress.ods) == 1  # radius has been adjusted to 3D space
+
+
+def test_apply_crow_fly_factoring():
+    pass
+
+
+def test_access_indices_are_offset():
+    pass
+
+
+def test_egress_indices_are_offset():
+    pass
+
+
+def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir):
+    config.path_outputs = tmpdir
+    connectors.main(config=config, data=gtfs_data_preprocessed)
+    for x in ['transfer', 'access', 'egress']:
+        assert os.path.exists(
+            os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
+        )
\ No newline at end of file
diff --git a/tests/test_data/centroids.csv b/tests/test_data/centroids.csv
index 5fbb1e0..3e3b3ac 100644
--- a/tests/test_data/centroids.csv
+++ b/tests/test_data/centroids.csv
@@ -1,19 +1,19 @@
-name,longitude,latitude
-E02003587,-1.155884355526687,50.72185610082279
-E02003586,-1.173878456845198,50.72301086140903
-E02003585,-1.224069337360162,50.72613466907799
-E02003584,-1.158508858308219,50.72873880022016
-E02003583,-1.28149160450734,50.75184512743099
-E02003582,-1.31006711916123,50.75340385923516
-E02003581,-1.297514083246226,50.75714214474556
-E02003589,-1.302344772901295,50.70267302231341
-E02003588,-1.284284792950486,50.703256856445805
-E02003597,-1.283759136860344,50.61568691131167
-E02003596,-1.175831901564544,50.63446808149097
-E02003595,-1.180359149209377,50.64417605386147
-E02003594,-1.154986228595743,50.659399194734654
-E02003593,-1.386537290710913,50.68165562633486
-E02003592,-1.526357692381475,50.68396619141156
-E02003591,-1.30109588996732,50.69411141243501
-E02003590,-1.096598389236477,50.69424449742397
-E02003598,-1.210809598549173,50.59781051582961
+name,x,y
+E02003587,459682,91699
+E02003586,458411,91813
+E02003585,454864,92122
+E02003584,459488,92462
+E02003583,450783,94940
+E02003582,448766,95094
+E02003581,449648,95518
+E02003589,449364,89458
+E02003588,450639,89535
+E02003597,450770,79798
+E02003596,458382,81965
+E02003595,458050,83041
+E02003594,459825,84754
+E02003593,443438,87068
+E02003592,433558,87253
+E02003591,449461,88507
+E02003590,463905,88678
+E02003598,455952,77862

From 26a3f678ad05539d12c801425feead6213957590 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 11 Dec 2023 17:15:11 +0000
Subject: [PATCH 13/30] crows fly distance

---
 gtfs_skims/connectors.py | 58 +++++++++++++++++++++++-----------------
 tests/test_connectors.py | 38 ++++++++++++++++++--------
 2 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index b573129..cc8a94a 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -35,9 +35,9 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array:
 class TransferConnectors:
     """ Manages transfer connectors. """
 
-    def __init__(self, coords: np.ndarray, max_tranfer_distance: float) -> None:
+    def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None:
         self.coords = coords
-        radius = max_tranfer_distance * (2**0.5)
+        radius = max_transfer_distance * (2**0.5)
         self.ods = query_pairs(coords, radius=radius)
 
     @cached_property
@@ -198,14 +198,14 @@ def __init__(
             self,
             coords_origins: np.ndarray,
             coords_destinations: np.ndarray,
-            max_tranfer_distance: float
+            max_transfer_distance: float
     ) -> None:
         self.coords_origins = coords_origins
         self.coords_destinations = coords_destinations
 
-        radius = max_tranfer_distance
+        radius = max_transfer_distance
         if coords_origins.shape[1] == 3:
-            radius += max_tranfer_distance * (2**0.5)
+            radius += max_transfer_distance * (2**0.5)
 
         self.ods = query_pairs_od(coords_origins, coords_destinations,
                                   radius=radius)
@@ -231,23 +231,25 @@ def dcoords(self) -> np.array:
 
 def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
-    max_tranfer_distance = config.max_transfer_time * time_to_distance
+    max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
 
     # get candidate connectors
     coords = data.stop_times[['x', 'y', 'departure_s']].values
-    tc = TransferConnectors(coords, max_tranfer_distance)
+    coords[:, :2] = coords[:, :2] * \
+        config.crows_fly_factor  # crow's fly transformation
+    tc = TransferConnectors(coords, max_transfer_distance)
 
     # apply more narrow filters:
     # enough time to make transfer
-    tc.filter_feasible_transfer(max_tranfer_distance)
+    tc.filter_feasible_transfer(max_transfer_distance)
 
     # maximum walk
-    if config.walk_distance_threshold < max_tranfer_distance:
+    if config.walk_distance_threshold < max_transfer_distance:
         tc.filter_max_walk(config.walk_distance_threshold)
 
     # maximum wait
-    if max_wait_distance < max_tranfer_distance:
+    if max_wait_distance < max_transfer_distance:
         tc.filter_max_wait(max_wait_distance)
 
     # not same route
@@ -272,21 +274,26 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     return arr
 
 
-def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.ndarray):
+def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame):
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
-    max_tranfer_distance = config.max_transfer_time * time_to_distance
+    max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
 
     # get candidate connectors
     coords_stops = data.stop_times[['x', 'y', 'departure_s']].values
+    coords_stops[:, :2] = coords_stops[:, :2] * \
+        config.crows_fly_factor  # crow's fly transformation
+    coords_origins = (origins[['x', 'y']] * config.crows_fly_factor).\
+        assign(z=config.start_s).values
+
     ac = AccessEgressConnectors(
-        coords_origins, coords_stops, max_tranfer_distance)
+        coords_origins, coords_stops, max_transfer_distance)
 
     # more narrow filtering
-    ac.filter_feasible_transfer(max_tranfer_distance)
-    if config.walk_distance_threshold < max_tranfer_distance:
+    ac.filter_feasible_transfer(max_transfer_distance)
+    if config.walk_distance_threshold < max_transfer_distance:
         ac.filter_max_walk(config.walk_distance_threshold)
-    if max_wait_distance < max_tranfer_distance:
+    if max_wait_distance < max_transfer_distance:
         ac.filter_max_wait(max_wait_distance)
 
     arr = np.concatenate([
@@ -298,11 +305,16 @@ def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.nda
     return arr
 
 
-def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: np.ndarray):
+def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame):
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
 
     # get candidate connectors
     coords_stops = data.stop_times[['x', 'y']].values
+    coords_stops[:, :2] = coords_stops[:, :2] * \
+        config.crows_fly_factor  # crow's fly transformation
+    coords_destinations = (
+        destinations[['x', 'y']] * config.crows_fly_factor).values
+
     ec = AccessEgressConnectors(
         coords_stops, coords_destinations, config.walk_distance_threshold)
 
@@ -321,18 +333,16 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne
 
     if data is None:
         data = GTFSData.from_parquet(config.path_outputs)
-    coords_origins = pd.read_csv(config.path_origins, index_col=0)
-    coords_destinations = pd.read_csv(config.path_destinations, index_col=0)
+    origins = pd.read_csv(config.path_origins, index_col=0)
+    destinations = pd.read_csv(config.path_destinations, index_col=0)
 
     # get feasible connections
     logger.info('Getting transfer connectors...')
     transfer_connectors = get_transfer_connectors(data, config)
     logger.info('Getting access connectors...')
-    access_connectors = get_access_connectors(
-        data, config, coords_origins.assign(z=config.start_s).values)
+    access_connectors = get_access_connectors(data, config, origins)
     logger.info('Getting egress connectors...')
-    egress_connectors = get_egress_connectors(
-        data, config, coords_destinations.values)
+    egress_connectors = get_egress_connectors(data, config, destinations)
 
     # convert to dataframe
     colnames = ['onode', 'dnode', 'walk', 'wait']
@@ -342,7 +352,7 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne
 
     # offset IDs for endpoints
     access_connectors['onode'] += len(data.stop_times)
-    egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins))
+    egress_connectors['dnode'] += (len(data.stop_times)+len(origins))
 
     # save
     logger.info(f'Saving connectors to f{config.path_outputs}...')
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index 7f0c0da..84abbd3 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -147,7 +147,7 @@ def test_get_od_walk():
     egress = connectors.AccessEgressConnectors(
         np.array([[0, 0], [1, 1]]),
         np.array([[0.5, 0.5], [2, 1], [2, 2]]),
-        max_tranfer_distance=1
+        max_transfer_distance=1
     )
     walk = egress.walk
     expected = np.array([
@@ -157,24 +157,40 @@ def test_get_od_walk():
 
 
 def test_convert_distance_3d():
-    egress = connectors.AccessEgressConnectors(
+    access = connectors.AccessEgressConnectors(
         np.array([[0, 0, 0]]),
         np.array([[1, 1, 1]]),
-        max_tranfer_distance=1
+        max_transfer_distance=1
     )
-    assert len(egress.ods) == 1  # radius has been adjusted to 3D space
+    assert len(access.ods) == 1  # radius has been adjusted to 3D space
 
 
-def test_apply_crow_fly_factoring():
-    pass
+def test_apply_crow_fly_factoring(gtfs_data_preprocessed, config):
+    arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config)
+    assert len(arr) == 2
+    max_walk = arr[:, 3].max()
 
+    config.walk_distance_threshold = max_walk
+    config.crows_fly_factor = 1
+    arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config)
+    assert len(arr) == 2
 
-def test_access_indices_are_offset():
-    pass
+    # after adding the crow's fly factor, the destination is further than the max distance
+    config.crows_fly_factor = 1.05
+    arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config)
+    assert len(arr) < 2
 
 
-def test_egress_indices_are_offset():
-    pass
+def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir):
+    config.path_outputs = tmpdir
+    transfer_connectors, access_connectors, egress_connectors = \
+        connectors.main(config=config, data=gtfs_data_preprocessed)
+    stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times)))
+    assert all(np.isin(access_connectors['dnode'], stop_time_ids))
+    assert all(np.isin(egress_connectors['onode'], stop_time_ids))
+    assert np.isin(access_connectors['onode'], stop_time_ids).sum() == 0
+    assert np.isin(egress_connectors['dnode'], stop_time_ids).sum() == 0
+    assert access_connectors['onode'].max() < egress_connectors['dnode'].min()
 
 
 def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir):
@@ -183,4 +199,4 @@ def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir):
     for x in ['transfer', 'access', 'egress']:
         assert os.path.exists(
             os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
-        )
\ No newline at end of file
+        )

From c5061620a93ce7d76a83de690183859b1caf1e85 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 11 Dec 2023 20:37:45 +0000
Subject: [PATCH 14/30] connectors dataclass

---
 .gitignore                       |  1 +
 gtfs_skims/connectors.py         | 40 ++++++--------
 gtfs_skims/graph.py              |  6 +++
 gtfs_skims/utils.py              | 92 +++++++++++++++++++++++++++-----
 tests/test_connectors.py         | 13 +++--
 tests/test_data/config_demo.yaml |  7 ++-
 tests/test_graph.py              |  5 ++
 7 files changed, 120 insertions(+), 44 deletions(-)
 create mode 100644 tests/test_graph.py

diff --git a/.gitignore b/.gitignore
index 5f9b874..56c75cc 100755
--- a/.gitignore
+++ b/.gitignore
@@ -40,4 +40,5 @@ mike-*.yml
 .ipynb_checkpoints
 
 sandbox.py
+tests/test_data/outputs/
 tests/test_data/outputs/*log
\ No newline at end of file
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index cc8a94a..f2c6d9e 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -8,7 +8,7 @@
 from scipy.spatial import KDTree
 import pandas as pd
 
-from gtfs_skims.utils import Config, GTFSData, get_logger
+from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger
 from gtfs_skims.variables import DATA_TYPE
 
 
@@ -327,7 +327,7 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF
     return arr
 
 
-def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]:
+def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData:
     logger = get_logger(os.path.join(
         config.path_outputs, 'log_connectors.log'))
 
@@ -338,35 +338,29 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne
 
     # get feasible connections
     logger.info('Getting transfer connectors...')
-    transfer_connectors = get_transfer_connectors(data, config)
+    connectors_transfer = get_transfer_connectors(data, config)
     logger.info('Getting access connectors...')
-    access_connectors = get_access_connectors(data, config, origins)
+    connectors_access = get_access_connectors(data, config, origins)
     logger.info('Getting egress connectors...')
-    egress_connectors = get_egress_connectors(data, config, destinations)
+    connectors_egress = get_egress_connectors(data, config, destinations)
 
     # convert to dataframe
     colnames = ['onode', 'dnode', 'walk', 'wait']
-    transfer_connectors = pd.DataFrame(transfer_connectors, columns=colnames)
-    access_connectors = pd.DataFrame(access_connectors, columns=colnames)
-    egress_connectors = pd.DataFrame(egress_connectors, columns=colnames)
+    connectors_transfer = pd.DataFrame(connectors_transfer, columns=colnames)
+    connectors_access = pd.DataFrame(connectors_access, columns=colnames)
+    connectors_egress = pd.DataFrame(connectors_egress, columns=colnames)
 
     # offset IDs for endpoints
-    access_connectors['onode'] += len(data.stop_times)
-    egress_connectors['dnode'] += (len(data.stop_times)+len(origins))
+    connectors_access['onode'] += len(data.stop_times)
+    connectors_egress['dnode'] += (len(data.stop_times)+len(origins))
 
     # save
-    logger.info(f'Saving connectors to f{config.path_outputs}...')
-    transfer_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_transfer.parquet.gzip'),
-        compression='gzip'
-    )
-    access_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_access.parquet.gzip'),
-        compression='gzip'
-    )
-    egress_connectors.to_parquet(
-        os.path.join(config.path_outputs, 'connectors_egress.parquet.gzip'),
-        compression='gzip'
+    logger.info(f'Saving connectors to {config.path_outputs}...')
+    connectors = ConnectorsData(
+        connectors_transfer=connectors_transfer,
+        connectors_access=connectors_access,
+        connectors_egress=connectors_egress,
     )
+    connectors.save(config.path_outputs)
 
-    return transfer_connectors, access_connectors, egress_connectors
+    return connectors
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index e69de29..bec47c2 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -0,0 +1,6 @@
+import os
+
+from graph_tool import Graph
+from graph_tool.topology import shortest_distance
+
+from gtfs_skims.utils import Config, GTFSData, get_logger
\ No newline at end of file
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index 3482a17..0f796c1 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -4,7 +4,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Self
 import yaml
 from zipfile import ZipFile
 
@@ -118,6 +118,9 @@ class Config:
     start_s: int
     walk_distance_threshold: int
     walk_speed: float
+    weight_walk: float
+    weight_wait: float
+    penalty_interchange: float
     steps: list
 
     @classmethod
@@ -146,16 +149,65 @@ def __repr__(self) -> str:
         return s
 
 
-@dataclass
-class GTFSData:
-    calendar: pd.DataFrame
-    routes: pd.DataFrame
-    stops: pd.DataFrame
-    stop_times: pd.DataFrame
-    trips: pd.DataFrame
+# @dataclass
+# class GTFSData:
+#     calendar: pd.DataFrame
+#     routes: pd.DataFrame
+#     stops: pd.DataFrame
+#     stop_times: pd.DataFrame
+#     trips: pd.DataFrame
+
+#     @classmethod
+#     def from_gtfs(cls, path_gtfs: str) -> GTFSData:
+#         """Load GTFS tables from a standard zipped GTFS file.
+
+#         Args:
+#             path_gtfs (str): Path to a zipped GTFS dataset.
+
+#         Returns:
+#             GTFSData: GTFS data object.
+#         """
+#         data = {}
+#         with ZipFile(path_gtfs, 'r') as zf:
+#             for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+#                 with zf.open(f'{name}.txt') as f:
+#                     data[name] = pd.read_csv(f, low_memory=False)
+#         return cls(**data)
+
+#     @classmethod
+#     def from_parquet(cls, path: str) -> GTFSData:
+#         """Construct class from pre-processed GTFS tables in Parquet format.
+
+#         Args:
+#             path (str): Path to tables.
+
+#         Returns:
+#             GTFSData: GTFS data object.
+#         """
+#         data = {}
+#         for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+#             data[name] = pd.read_parquet(
+#                 os.path.join(path, f'{name}.parquet.gzip'))
+#         return cls(**data)
+
+#     def save(self, path_outputs: str) -> None:
+#         """Export all tables in zipped parquet format.
+
+#         Args:
+#             path_outputs (str): Directory to save outputs.
+#         """
+#         if not os.path.exists(path_outputs):
+#             os.makedirs(path_outputs)
+
+#         for k, v in self.__dict__.items():
+#             v.to_parquet(os.path.join(
+#                 path_outputs, f'{k}.parquet.gzip'), compression='gzip')
+
 
+@dataclass
+class Data:
     @classmethod
-    def from_gtfs(cls, path_gtfs: str) -> GTFSData:
+    def from_gtfs(cls, path_gtfs: str) -> Self:
         """Load GTFS tables from a standard zipped GTFS file. 
 
         Args:
@@ -166,13 +218,13 @@ def from_gtfs(cls, path_gtfs: str) -> GTFSData:
         """
         data = {}
         with ZipFile(path_gtfs, 'r') as zf:
-            for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+            for name in cls.__annotations__.keys():
                 with zf.open(f'{name}.txt') as f:
                     data[name] = pd.read_csv(f, low_memory=False)
         return cls(**data)
 
     @classmethod
-    def from_parquet(cls, path: str) -> GTFSData:
+    def from_parquet(cls, path: str) -> Self:
         """Construct class from pre-processed GTFS tables in Parquet format.
 
         Args:
@@ -182,7 +234,7 @@ def from_parquet(cls, path: str) -> GTFSData:
             GTFSData: GTFS data object.
         """
         data = {}
-        for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+        for name in cls.__annotations__.keys():
             data[name] = pd.read_parquet(
                 os.path.join(path, f'{name}.parquet.gzip'))
         return cls(**data)
@@ -199,3 +251,19 @@ def save(self, path_outputs: str) -> None:
         for k, v in self.__dict__.items():
             v.to_parquet(os.path.join(
                 path_outputs, f'{k}.parquet.gzip'), compression='gzip')
+
+
+@dataclass
+class GTFSData(Data):
+    calendar: pd.DataFrame
+    routes: pd.DataFrame
+    stops: pd.DataFrame
+    stop_times: pd.DataFrame
+    trips: pd.DataFrame
+
+
+@dataclass
+class ConnectorsData(Data):
+    connectors_transfer: pd.DataFrame
+    connectors_access: pd.DataFrame
+    connectors_egress: pd.DataFrame
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index 84abbd3..f47fe9a 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -183,14 +183,13 @@ def test_apply_crow_fly_factoring(gtfs_data_preprocessed, config):
 
 def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir):
     config.path_outputs = tmpdir
-    transfer_connectors, access_connectors, egress_connectors = \
-        connectors.main(config=config, data=gtfs_data_preprocessed)
+    conn = connectors.main(config=config, data=gtfs_data_preprocessed)
     stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times)))
-    assert all(np.isin(access_connectors['dnode'], stop_time_ids))
-    assert all(np.isin(egress_connectors['onode'], stop_time_ids))
-    assert np.isin(access_connectors['onode'], stop_time_ids).sum() == 0
-    assert np.isin(egress_connectors['dnode'], stop_time_ids).sum() == 0
-    assert access_connectors['onode'].max() < egress_connectors['dnode'].min()
+    assert all(np.isin(conn.connectors_access['dnode'], stop_time_ids))
+    assert all(np.isin(conn.connectors_egress['onode'], stop_time_ids))
+    assert np.isin(conn.connectors_access['onode'], stop_time_ids).sum() == 0
+    assert np.isin(conn.connectors_egress['dnode'], stop_time_ids).sum() == 0
+    assert conn.connectors_access['onode'].max() < conn.connectors_egress['dnode'].min()
 
 
 def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir):
diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml
index 4196b30..fb049f7 100644
--- a/tests/test_data/config_demo.yaml
+++ b/tests/test_data/config_demo.yaml
@@ -11,11 +11,14 @@ settings:
   walk_distance_threshold : 2000  # m | Max walk distance in a leg
   walk_speed : 4.5  # kph | Walking speed
   crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
-  max_transfer_time : 1800 # Max combined time of walking and waiting (sec)
+  max_transfer_time : 1800 # Max combined time of walking and waiting (sec) of a transfer
   max_wait : 1800  # sec | Max wait time at a stop / leg
   bounding_box : null
   epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
-  
+  weight_walk: 2 # value of walk time, ratio to in-vehicle time
+  weight_wait: 2 # value of wait time, ratio to in-vehicle time
+  penalty_interchange: 300 # seconds added to generalised cost for each interchange
+
 steps:
   - preprocessing
   - connectors
diff --git a/tests/test_graph.py b/tests/test_graph.py
new file mode 100644
index 0000000..e1fe1b3
--- /dev/null
+++ b/tests/test_graph.py
@@ -0,0 +1,5 @@
+import os
+
+import pytest
+
+from gtfs_skims import graph
\ No newline at end of file

From cd8e47e6f5092efb5b91e2be2f7978227174ab1b Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Mon, 11 Dec 2023 21:40:24 +0000
Subject: [PATCH 15/30] edges

---
 gtfs_skims/connectors.py                      |  46 ++++++++++++++++-
 gtfs_skims/graph.py                           |  47 +++++++++++++++++-
 tests/conftest.py                             |   7 ++-
 .../test_data/outputs/stop_times.parquet.gzip | Bin 8296 -> 8287 bytes
 tests/test_data/outputs/stops.parquet.gzip    | Bin 8421 -> 8417 bytes
 tests/test_graph.py                           |  30 ++++++++++-
 6 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index f2c6d9e..a3536fd 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -230,6 +230,15 @@ def dcoords(self) -> np.array:
 
 
 def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
+    """Get all transfer connectors (between stops).
+
+    Args:
+        data (GTFSData): GTFS data object.
+        config (Config): Config object.
+
+    Returns:
+        np.ndarray: [origin id, destination id, walk time, wait time]
+    """
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
     max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
@@ -274,7 +283,18 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     return arr
 
 
-def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame):
+def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame) -> np.ndarray:
+    """Get all access connectors (between origins and stops).
+
+    Args:
+        data (GTFSData): GTFS data object.
+        config (Config): Config object.
+        destinations (pd.DataFrame): Origin coordinates dataframe. 
+        Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip start points.
+
+    Returns:
+        np.ndarray: [origin id, destination id, walk time, wait time]
+    """
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
     max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
@@ -305,7 +325,18 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame)
     return arr
 
 
-def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame):
+def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame) -> np.ndarray:
+    """Get all egress connectors (between stops and destinations).
+
+    Args:
+        data (GTFSData): GTFS data object.
+        config (Config): Config object.
+        destinations (pd.DataFrame): Destination coordinates dataframe. 
+        Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip ends.
+
+    Returns:
+        np.ndarray: [origin id, destination id, walk time, wait time]
+    """
     time_to_distance = config.walk_speed/3.6  # km/hr to meters
 
     # get candidate connectors
@@ -328,6 +359,17 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF
 
 
 def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData:
+    """Get feasible connections (transfers, access, egress).
+
+    Args:
+        config (Config): Config object.
+        data (Optional[GTFSData], optional): GTFS data object. 
+            If not provided, reads the stored parquet files from the outputs directory. 
+            Defaults to None.
+
+    Returns:
+        ConnectorsData: Connectors object, holding the three output tables.
+    """
     logger = get_logger(os.path.join(
         config.path_outputs, 'log_connectors.log'))
 
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index bec47c2..2d13ed0 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -2,5 +2,50 @@
 
 from graph_tool import Graph
 from graph_tool.topology import shortest_distance
+import numpy as np
+import pandas as pd
 
-from gtfs_skims.utils import Config, GTFSData, get_logger
\ No newline at end of file
+from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger
+
+
+def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame:
+    """Get in-vehicle times between stops.
+
+    Args:
+        stop_times (pd.DataFrame): The stoptimes GTFS table.
+
+    Returns:
+        np.ndarray: [origin id, destination id, in-vehicle time]
+    """
+    edges_ivt = pd.Series(range(len(stop_times)))
+    trip_id = stop_times.reset_index()['trip_id']
+    departures = stop_times.reset_index()['departure_s']
+
+    edges_ivt = pd.concat([
+        edges_ivt,
+        edges_ivt.groupby(trip_id).shift(-1),
+        departures.groupby(trip_id).shift(-1) - departures,
+    ], axis=1).dropna().map(int)
+    edges_ivt.columns = ['onode', 'dnode', 'ivt']
+
+    return edges_ivt
+
+
+def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.DataFrame:
+    """Get all edges for the accessibility graph.
+
+    Args:
+        gtfs_data (GTFSData): GTFS data object.
+        connectors_data (ConnectorsData): Connectords data object.
+
+    Returns:
+        pd.DataFrame: ['onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer']
+    """
+    edges = pd.concat([
+        get_ivt_edges(gtfs_data.stop_times),
+        connectors_data.connectors_transfer.assign(transfer=1),
+        connectors_data.connectors_access,
+        connectors_data.connectors_egress,
+    ], axis=0).fillna(0).map(int)
+
+    return edges
diff --git a/tests/conftest.py b/tests/conftest.py
index bc546af..824ee6e 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,7 +12,7 @@ def test_content(response):
 
 import pytest
 
-from gtfs_skims.utils import Config, GTFSData
+from gtfs_skims.utils import Config, GTFSData, ConnectorsData
 
 TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data')
 
@@ -40,3 +40,8 @@ def gtfs_data():
 @pytest.fixture
 def gtfs_data_preprocessed():
     return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs'))
+
+
+@pytest.fixture
+def connectors_data():
+    return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs'))
diff --git a/tests/test_data/outputs/stop_times.parquet.gzip b/tests/test_data/outputs/stop_times.parquet.gzip
index 29ecd67f7f10a602c6edc3eeb7076283075ac3df..2dd4231027e3f3d89f21e253de2dc415a3668ae0 100644
GIT binary patch
delta 1166
zcmV;91abT5K;J;H3K9as2$Mkx7k|`CNK{cA$MI`QMYXUX2!aY~DYP3x5Zg!uL5mhm
z8!2$I5?V;KiIhkMQ4xIL#y8@CqA=qqzD7rUi{oN^KL$}3J{S~TIpg?1e_t0@+z5gj
z@8-jK|Ih!Nb0ZDI@I*!A?%y<yKkJ_S6_Ot?3WGtJPI(7U$ETwkGY@5~aewPXKjl2M
zVJi0HbNbg&UOA9nY(XD}Sg!$t`{uYT_#V^Hz*TH!J_{C5Z^IM}V;TLjaEbcQXV$;0
z{9g4wo-MGCdD%H9-(nm(W@Yq@^iRo;lbk1hp}rk|Ps>8wqF+4bPvAJ`?UPBEj79Ww
zFfNbzeE68SVlV}hiQA6v@P7^Mop_ghW|9XBKEQa|(-^;SX8s>@=KQ73oAYbGq5JQP
zkdM&IyRf5^_hH5VSWJ5&=1{-E`Q~5;=bC}vxu3b*!@s*SKsg)laW88_=KI`?vCOlH
zi($Ep0gPka`^58*I1}+7{$U>_=)!jLQcu3F^LCxLi{#B#W!CQE9e=m*Uu%<|4*9|@
z+q!7)rrjg^d*phb>>ZF@gYxCDtQetwl>duYc2gdv-8Cd<8Sfr2&%eTYQ^a8<j&B%E
zdqR)7zY}Zd7sL1`?5~OazGnZCt?~|eDI`xLPISnmMtQqlj#bOAxK=4&p}$;imq|yd
zoWub2i}XuiTnIlgZz2YU@hR*7B%UDgwK~r~=hN>9VzJYi4+#JO6#x~UBo(R%lR*g=
zf7DGV<Z&Fw@zHLn-K{yu?rGbp*{8#-wC!%Y?aX*)(qtM>;pAaFji)>gE|hWdREi4`
zkvxW+Oo@wJD1?&BL`@_jPf<wU_Z<FmQOd<!UVXm*-{bc`{a>k6s@c~*M@Apj-ESWA
z@EtB2#1wK5y3sp-CrF&dejLC##+|a>e<xfuDEA*?d^4utG0*>EF7upvaSdJf&gB;(
zo`;H4;j9xOE|7i6_2h9(3lm3~@5UnPm1GP1lmsjHf2#gI_dP5Q@eR5v#9p$gQtU%@
z73U>88DEEq_zzRd#gAp;SJY9zAy4CBk+@O7`_C5}$;TMXI6wSQC>nX*!aO(ne^bB0
z9_DF^#Z4S3VV_d5m2++2E)H`)Eqxlg*T3%$Wq&<(aV(ejnj_ZWmu&GeQw+=!+c1E7
zwvG3KNod1>4AGp%x$qiK(#2TDx3m5t`#Z6V`dbV^GbUq4mU7-_?qkKD)DsxD&OL18
zLY^-(-a~(O>@R(L>D%M`rn4wre=&)AR7Z(1kzyy`)fv90arlRNG~ZD*dcUv!<UR6l
z@-;fB?~`q~Mg0Po(3j^f@&LIBHE6|2#{D2Sqk(f9xrdXi=e}oHOCP?Olzq}suV$Q`
zy4x%s#E4p}*bpbakLQ1d|8WAh(Tu-Pi`}f#!nzvVV?Pi3mvWv#&KDT<3h;aO{%!$-
zZi-5?XBI>Q73v5X02YWyJQxV80tWy90000O2(gpE85Sz?P#hHm85IB-7y)?|1Qx&q
g7RDqN(g+y<7V=O$7zht}0RR91000;Wgt8iF22=$+g#Z8m

delta 1180
zcmV;N1Y`T(K<GfQ3K9XxlRy+1e@jSIQ5?tdt42~SEC_<2f?5jgh7iOy5<$?SMbkzK
zoUDWv5^W+S(jZiVEL^GhzyTk~%s4*A@y+0iaWTGi22mG27!=*jIKI%|*Toe#g5bux
z`EcI<^FQa@1j8_Tuf!GYUNsIsYfOIGmai}YBT<=0c?*xn=V2!n?8_7*fAPo=<zlp9
z4*KvR{p%<%?MXkjU;v}6*NBl_b6ggDiMhDHBbO<=na_fMskdP^#;}Zjzi@&2w<p#=
zul!#0J|2YFM^Jj_<a10#$E-}6k)bL1dXn?Rchr0E>$EJv4f>_wp9vi2yvJlFX5l~j
zIT%;Wd;z>mT*;V?S;TF}e;4?Sb}wFMp9SQ>g10b@_FTpXPt5;m&YZuTpgF(xc-{X%
zoV<g6-h~~Vybmk>#Q(HsU?KHuoNpm^aIX3Ik^5Q1J^UV#VamVp2KTad+kBthn8G~k
zIM4lD#4x6^?oHykO`I9{1HZA4Qfx*Kd8sE~mwCI)+gb8vt1@eMfAfx8`LB6ok5@kG
zlI`8J_t4%eeSLD(C;JCv*PwhnEGtH+_w#=lmEDwwXm1{pGmP&XFweildXvOqC5{i6
zM0<LlxxW)@=$Fj+d+g7}exI`c_;z`Xyp)iqVHY}OW|K^8kfYV|J+4&BCm5=bo8{6`
zCMPgV{XG5B85hMjBg~tRF}%<EpNS_zd~L4N&-wT}0N~u`?hgq702Kfgo+K5vlRy+1
ze@jTzQ547VztlpgU>23_YCtOj8zd?c9Unb3YSI`%VpgVOHd*$VQfX7rwCZ8BXcHoe
z_*PI{IiR>vHwr51LN|koPsH~V6#0E;@fSCO;Ktc}IOpE`zvrGi+!Kajw4DrDu!k7C
zzYfgBGuRs|UBqP=z~K7vIN6RBSQX4Oe{Y5FUBZ6sWW1L7Md-pM#-q{0I$w+&K>wb)
z&Lzn8MA>{#-nlQkh(n3L6E|Y!U0Kii0KO$(NSw+(-gxu=ht=PsK1XW4Jca&G)Jy!Z
zK$fGWP_7aCnEwsaF&Z;I%8Ook1tZBD*g}3jPY%82{&VF|;w4OA-dVixM#eM#e}{De
z3?sjVrL42Qlfzi|UM_x+f2nH-r?8guMbM|3J^j0uZ|?7eM>f3VUUTFZy!=9Lx@A<h
z{D(I3Ia%BbK0`O!GNmJfx^NqppG#+!`TO1M@52)Er!f&7=)(Wm=5-OA$B8${r!#Mm
zbGV7~81H5N1pOKF9QW<GZwK#Ne`Jc;zGP`fOOi}^Ad7idTX|2@@HY8JyrUKjzOUzr
zr-^S78>}xNKTF<?Bjh)*2SXY66IT-dLMuA4iFr2S-)N`qIO^~b$8z3P{7N6LCY$?o
zo&0C!y&@lYEa#rc@Tc;JQ--DSzrq-7#8GtMO$^5pzEi|^tvJJelkD%M51wl3i%Q!6
zo`b&~_)Gz8vtAZN0~hcJ82}cDNIVz_ssaZ9000007znVO29vQD6fF2q92Eo^6#y9+
u0eKY!7Qh4+#v~Tr2pIqt_)t6;2oQMz0000002l~`JOPuj7ZbA#8dC;9DoVlt

diff --git a/tests/test_data/outputs/stops.parquet.gzip b/tests/test_data/outputs/stops.parquet.gzip
index 10a1d132786d559f46e915dc36d64b1d4769af4a..f375508474c70b8beaed194c6da4d3574248b1ee 100644
GIT binary patch
delta 1104
zcmV-W1h4z$LE%BLWfcO12$K*C7L$Dy3V%jLwXh%vf(mLWw386THWES5qD8img3L;2
zA<-sMq6|VM$f8a5BD#<iXez!RRF;x0ZfRLcl#3onk*n^_9@P0--2TnS%sDgv%$b?I
zL?SVto73=PapKQc_B}kB33qW3)9J8Qd!Dt%_p|O-Sd>`u*BR|bR<N26xl#UY?SI*y
z;W#^)V#fdOFwH{lWDOI1&kpkx^1S{ER&jyN^3L$B{=M8Vt&Xq5zE|OP7PECK%zqiq
zOoqEB?3?@a_wd+@u!&#fmGIniK2w)exSy50Ag|T9M)Re(*18H<#Y*c=atF7G_i?#>
z)~Q1wSFuFA#`x*~=07Z6EkBtX_kSXl6RzR7`;ugvdsD_!yeMAIdi|fAV?BGF<7po9
z9va;H6W_u)?Xz6r9o))fzn=~knddX#EQIek$726oX+5j0vz#Y++&(U`ojvN(qFxo(
zBY(>^_w0&uzZ_0*uGzZ8OXX*{=Z3m4+3q|$-S6&js5jiyA9fFj4~h?Y?|;ML#|Pp4
zk#OKqxOp_ZI;Q`Ldo~^pYLAMyKMtpi?;pvYf7bsdt)t934l-Z7bT~V|jW^{L7{AW`
z?%D5F`_JnNm#IsWI*qll)qA`fE^P^)s_OxMQ1`FSWkLUX`A3XTo3G3K|GwYTs^CD^
z1pojQ02Qtm73v2p74iZV0F#Cn5`SH2&2bn9@G~w}s9mgaC98S6n6-q+Hk)}_I&IAm
zyE2Z=hAkU5ZYakUrW7|s6D{)+a%ClM<c1otlADv&vP4eaqDFc?yEy-wPv7VF{QfV`
zumAK!B9Wbz_q8f7@&9_peVoOS%CK1;V1{FB!Y91JK|a;@-F@cqb!F~zhkx|<vzb}z
zE7@*ex*~kTVe5r8;m@_<!*%wr51-5V@+tWqTWZ5Q_Gj3o-X}MBPItB38_wYHk?@*a
z*B4%A!BOuir}dxYHm>5%!{OXR;X;<G|CJx{OIJ90z?}AnXXGra^%ZhXXIN$ZoP8M<
zssG`CeZ>dEcf8&0IX&Td?|(JQkG$pkO3Z19i#gsA-fee&d&A>g&=&sQ6PCAz7g(m=
zmU0f<&J@dbhshT2!AXAG6>ipl(fwa|ewtU*XR?M#?%<`?+<o(XPa~JAZ`JqKcckP_
z>m&Nd&1cNKN*lu2Tg-{~`GH9;V+pS|IsZoI%U5+_1q(KY^&7&={%EVm{--9cP~YS~
zDyYr<zl!7udAU5vUiD9Mitp8b@g?)E56gq{Sr)UA5A-dO`&sF|t9(aVuJFA-c)}bO
z)#c8qRQ;H~4)shj{9JGD=6l-QS38FZ>*M-+?7!)L<?cUq{R1{zBdxQ385aU9tOywZ
z7QR(H7znEZ2LJ#7000;Wv780~00000761Sl92Ec+02KlN001WHlffGl6_N-U02b<9
WJQxTMc>w?b0000O2!ylh8~X&#zdcg`

delta 1127
zcmV-t1ep8bLFGZPWfcO72$K*C7L$Dy3V%XUEi4Fvpn_To?IZ-TjYJT%XpwECAhQx$
zNVJKRD1%T5vS?+}gD&(Sd{vsS3^5H|SX!16<)Q~tRI97EGU|LSuK(s^=A4;-=FCh^
zB9Zu!omKy9S>o?knhuXGgu9u;)O=W@{hc+&_po*`EKKD7b4I&?<*ec$H_5-HJ%9T<
z9OZqEbKd{%GQ|S!Vl@|kh99&$%u~P%`pa3#G@Im|<vacRxN$}ur^D_y;SLtD<#m`h
z70$d2_e|P1_v`QC@mFCZzsf7-`3b&Im*?RDR`8;{7ULSsH_mm|mCs66Sa*^;xm~=Q
zE9|pY9SXRb#p2b*PcNDOh<KI!WPf(ti}9>*El1s#BwO8^Ql91|@iNxw|Lh#=_{ce)
z;bHHg-n~DW3g@)Xaiw=~b0PEnbg<AoU-;HNe9t)+`R^+0S!11LJjE0CahdJxQkUE6
zRemk<w_bJ6nw@)7IKjCq)+Jsdf1Z1<s|%AI&hvr$-4*tChnsuD&OY&e@qfqO`#|_<
zFnlr;_C5`_jD*e4^pCn{V_~26uz34$IAwg#Q0Dxz{x@kIrPgtXdEzAlnfa}}Aur$f
z_4e0hzuW9TyE9y_E{*Cm(#BTr@m`pFCmd7PgZ!xO-<(TY{|5O-jZc~Hq51!Re*j%s
zq*>Pm000#L6|NT*><26r@{<z_6Mt3HNl2AZ6bJBUZG;M3H0?^rioisoB5}6CD6@Ei
zge%i$W|j?BN}GaSt43P1iI5_itiWiM5VWZ_1s3I^HjfIMh|K1okn?l#-sZ>u+;hM8
zedlr~ClZO#NjanCIf?P#19oy6hswex`64rXTM^#pWnP`A>-+9GbNP%9t$+9HKgA}F
zSubISed*HhIR~xJs0x2ohj&(nD^`V%<XrivJiwNk@TUD4?o#iSH~OFMO8G!Ig@gOe
zOJ3U(USeKv_)|{nKf*?quz6oNr#oE0LiGgis(<MUhj*A$XLwv5W0k(?oV_!ww0_#Y
z3}>nT<pullcZV-|V^8>XZ-02kdkynF`+Z-5IbG)>j&y{#wmHA{@DS&34gaLV;<oTC
zi`3g&odY*B#iEw5uGxF=AHQu0ldaj$f9(I$>{Fl0YSyud=i0LO75JVcm#8=Dd*M4$
za+mcX{Uhe{#=HvaomYc7ae%K`$EBRdzNGuO$@%hWZCK8{HDUeg@PE8}b<cfj;BxhK
z?onP%_WhbEe~_2S3Hy80KdGnqO8qyVFxUE^d__LNd?tBYUy*#0<=(r(d!*$u-}{4y
z&0%3}_M8^0AJn&9J+nUiyusYf_n5htI){(e-|6qR|C;9&d;Y}lKj>el71sp-02Klm
zoJa<9V`yb<VHFSn6|;^R0|FPa2pIqtzEwOJ2&w`H0000002m0coCcFX8Wbw<T^tnz
t85IB-7y)?|1QzlF7S<OQln5CB7Vup>7zhw~0RR91000;Wg|p@x`ve87M|S`K

diff --git a/tests/test_graph.py b/tests/test_graph.py
index e1fe1b3..a4eb8fd 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -1,5 +1,33 @@
 import os
 
+import numpy as np
+import pandas as pd
 import pytest
 
-from gtfs_skims import graph
\ No newline at end of file
+from gtfs_skims import graph
+
+
+def test_get_ivt_times():
+    stop_times = pd.DataFrame({
+        'trip_id': [0, 1, 0, 1],
+        'departure_s': [100, 105, 120, 150]
+    })
+    ivt_edges = graph.get_ivt_edges(stop_times)
+    expected = np.array([
+        [0, 2, 20],
+        [1, 3, 45]
+    ])
+    np.testing.assert_equal(ivt_edges.values, expected)
+
+
+def test_get_all_edges(gtfs_data_preprocessed, connectors_data):
+    edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data)
+
+    len_expected = len(gtfs_data_preprocessed.stop_times) - gtfs_data_preprocessed.stop_times['trip_id'].nunique()
+    len_expected += len(connectors_data.connectors_transfer)
+    len_expected += len(connectors_data.connectors_access)
+    len_expected += len(connectors_data.connectors_egress)
+    assert len(edges) == len_expected
+
+    assert list(edges.columns) == [
+        'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer']

From 18410ab8f89a008d8d02f28cffb0de05c736ec72 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Tue, 12 Dec 2023 14:14:33 +0000
Subject: [PATCH 16/30] shortest distances

---
 .gitignore                                    |   2 +-
 gtfs_skims/graph.py                           |  90 +++++++++++++++++-
 .../outputs/connectors_access.parquet.gzip    | Bin 0 -> 1600 bytes
 .../outputs/connectors_egress.parquet.gzip    | Bin 0 -> 2385 bytes
 .../outputs/connectors_transfer.parquet.gzip  | Bin 0 -> 1362 bytes
 tests/test_graph.py                           |  68 ++++++++++++-
 6 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_data/outputs/connectors_access.parquet.gzip
 create mode 100644 tests/test_data/outputs/connectors_egress.parquet.gzip
 create mode 100644 tests/test_data/outputs/connectors_transfer.parquet.gzip

diff --git a/.gitignore b/.gitignore
index 56c75cc..0cb40c9 100755
--- a/.gitignore
+++ b/.gitignore
@@ -40,5 +40,5 @@ mike-*.yml
 .ipynb_checkpoints
 
 sandbox.py
-tests/test_data/outputs/
+# tests/test_data/outputs/
 tests/test_data/outputs/*log
\ No newline at end of file
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index 2d13ed0..3db94b2 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -1,4 +1,6 @@
-import os
+from functools import partial
+import multiprocessing
+from typing import Optional
 
 from graph_tool import Graph
 from graph_tool.topology import shortest_distance
@@ -49,3 +51,89 @@ def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.Da
     ], axis=0).fillna(0).map(int)
 
     return edges
+
+
+def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame:
+    """Calculate generalised time and add it as a column to the 'edges' table.
+
+    Args:
+        edges (pd.DataFrame): Edges dataframe. Should include these columns: 
+            ['ivt', 'walk', 'wait', 'transfer']
+        config (Config): Config object.
+
+    Returns:
+        pd.DataFrame: Edges dataframe, with the generalised time ("gc") column included.
+    """
+    edges['gc'] = edges['ivt'] +\
+        edges['walk'] * config.weight_walk +\
+        edges['wait'] * config.weight_wait +\
+        edges['transfer'] * config.penalty_interchange
+
+    # adding unweighted time as well
+    edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1)
+
+    return edges['gc']
+
+
+def build_graph(
+        edges: pd.DataFrame,
+        vars=['ivt', 'walk', 'wait', 'time', 'gc']
+) -> Graph:
+    """Build a network graph from the edges table.
+
+    Args:
+        edges (pd.DataFrame): Edges table. Should include the 'gc' and 'time' columns from the 'add_gc' method.
+        vars (list): list of variables to include in the graph as edge properties.
+
+    Returns:
+        Graph: Connected GTFS graph
+    """
+    eprops = [(x, 'int') for x in vars]
+    g = Graph(
+        edges[['onode', 'dnode']+vars].values,
+        hashed=False,
+        eprops=eprops
+    )
+    return g
+
+
+def get_shortest_distances_single(
+        graph: Graph,
+        onode: int,
+        dnodes: list[int],
+        max_dist: Optional[float] = None,
+        attribute: str = 'gc'
+) -> np.ndarray:
+    d = shortest_distance(graph, onode, dnodes,
+                          weights=graph.edge_properties[attribute], dense=False,
+                          max_dist=max_dist, directed=True)
+    d = np.concatenate([np.array([onode]), d])
+
+    return d
+
+
+def get_shortest_distances(
+        graph: Graph,
+        onodes: list[int],
+        dnodes: list[int],
+        max_dist: Optional[float] = None,
+        attribute: str = 'gc'
+) -> pd.DataFrame:
+    n_cpus = multiprocessing.cpu_count() - 1
+    dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes,
+                           max_dist=max_dist, attribute=attribute)
+    with multiprocessing.Pool(n_cpus) as pool_obj:
+        dists = pool_obj.map(dist_wrapper, onodes)
+
+    dists = np.array(dists)
+    dists = dists[dists[:, 0].argsort()]  # sort by source node
+
+    # convert to dataframe and reindex
+    dists = pd.DataFrame(dists[:, 1:], index=dists[:, 0], columns=dnodes)
+    dists = dists.loc[onodes]
+
+    return dists
+
+
+def main():
+    pass
diff --git a/tests/test_data/outputs/connectors_access.parquet.gzip b/tests/test_data/outputs/connectors_access.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..73e66e9f735a1dcad52c7bbc804b77540dd99f04
GIT binary patch
literal 1600
zcmWG=3^EjD5S_v#nxG?U2V}8{axgH+cLNC!_`mk>MqUO67T1H{eVyJU{@)lP)p<@e
z-Qgr_waxj*8U4RRn7V+<ASO&<#A-z88A~CD01=i0#}CGEGp?+CzRzmn&)}=VJ_eO8
zA~zTM>*dLLX=O=8Syi#Qsild9nU($f7xa6&cq^}L0>bb&*bO&5ZOG>kAkgygqkwn;
zo8aCs&yuLMULiu;3wc|;Uzqr-2(fOJ{$OtQbJE#2eG{ixUYlGMq7l2&`JIRKb>X!W
zvqbJlt4RE`JfZimjOC4>&TpS|$5VC6YW%gC3(OxL^!l&*|2gy3oip|#9P$8%L(Tvl
z;vnGmuwJ~?O+V06rs<Ga)I$lWxwklFTpeFB2=y%};cKnv_1u?ruV-q=tQ#K2c_#C&
zD5r0J*y7qGAjp=oML+146rU)cNaK}9AD4=9tL3@<;^VXS*S}S7w>7e|8|(xo$rul|
z;^d6f+(ckRi86^gh%!mA=I7<7q^e56`6;M;mh!}$Y=ktBn^^+nirGoZNcyO8$cQpY
ziZVz-jS*!LvlH9FB-X+x$sopIBf+A~$iN`Mq5vet7#JjFfN>492N>yUJ&bUjU<<)I
zcQJ|W0_p5w1ZtfMR3^bv4kXdE9%F)O1-lKVa}pC!=P@RrP7zj+&h3~wZ?GWg%q)TF
zYy;`M!2;Cj57r5CAgY~W&p5^GfbN%ID@e>sNi3G=WUE$6&d({$&5O^>OG&LrEmpEp
zh^|&j%S_EliO);SO$GAvN^^2_6qIsPOA=EOOA-+xsPcKGxdoN+C6xsrJxcjWS*gh-
zN<e)OtKbs3nH8xix|w+;sp+XjO0}^-6%ZF-vswups~}Tw+Ky0y>VQ(96~;zb8~~N5
z)loq6rxG{@@mmI0La}9_^oZXsm<lRbl?jds++hgH4IsymXcai}ic%9x@{53RUagdq
znN*ZmR0)dzw8Y|)g2bZ2(p0d)WvNBQnfZAj86yKDV?9FyJwqj66exj{T|7z($Oejm
z6c#1srKf^jg^~`7OA?Dpfbs@F-NhxT1wf`Dn3)e^8XJO>6g1HxTdQZNXACk8=&F*;
x5};#{ZH=vEkkCVNvu;6UNk)F20@MHnxT6&`l1ejkQWOj{89G@Q7y^KG2LP8d%6<R<

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/connectors_egress.parquet.gzip b/tests/test_data/outputs/connectors_egress.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..e6e6a1970ecc8d9506b9881450f9e87c50e85241
GIT binary patch
literal 2385
zcmb_ec~BE~6kZ@^qE!q?Fo7r`prBxY1iTMGgdhkgcm%XDm<5A5Fo%R&rNJXAG@=L|
zprRv6@dR52@c>aYv6ZtF>Ol|&6#)}MERl2*5yXNsop%4&`MrJbd*Aol@BMZ*$YYHQ
z3P9CkP*S)fst^vX;V1+ESc##V?$WNR3+5a%0K8vbep7$7p4n8~*%{R&V>VSWn*xdj
z#gv=AozJUzJ?CQs-)mB*Sin5iY#`@)=2>{Ip5}&3&0gad<u@(gq1P5`ve(P(jET)v
zQ@b9?POn-YBi9V?>J8cCV8%?@Inj-bb=9GNki!^uo##^c83EOl67qWf@))Iipi=Uz
z+UKv8%Q~El=(!UmcZpu9kDM_C8+zUW$y1_t<)ir+rz!L^{*tO=yYr8wb`Y0GJaUv)
zVx{*NOCK0IR^T0v&bU_VNO}y43NqGS3$1?`O0jzbrZjGyn<a}7H%1j_bw=!IWbDZL
z^O}z;+)5<}j9ZY#a>St)^dUJcpv5fUF@{%vg48Y>$9uYwm?=Z?>Nb)ZWRrL`&BU9s
zX}pGJQlHEy);_Hp9oTL5Yd6+cxGPgAY7nN}6z=JJe>cH3M;fWPpWyaf8lx!JxZaj>
z6c03RT50lO<{JNtzN8L`b=E`Ax!iQ|;?}c($llXsWzc?NhM#DBr0J*kma?J3dir>B
z!9qV|h+(ta(k0vA05IsB!RXF)ru$B~gG0i?3}b^lkEZN)QXPB(x_fP3(Uuz&VpU5q
z2Hu5)8l)ml-g7N!PkWkuY0j<oH1(os!SR=sDp@O~Z0~U)jIKhMw*B7U)GudRzK|P;
z&CQ6z%r6jQFxUE$*RIAs=us2~8Zqz_P9O0VjbAaRfQ4?{NhXtjMBbPkMz?{j`1yj@
z#-e@jqFm42wiXZJc_Jjm+pM-GH=T^QvFqNdyhMWC_J%VDEv+=_fr0V`THd?F=2E0*
zq{n%iDffCc$ig<!l>*!k4vk3f3*Ky#ldZ{@j$B%$Bve|yKU*vm`&I0DMSc05zXTZ&
zd_=%fuNW{8T&jC94qL2(trIWXe7Y>nb+c?iRHoIFoD+@vCcS^$VXv)hj*G_z_hZ8~
zW|8|Z=g-?`q<U#x+*m?sF(94FBlTFnlXR`j^6G4LXq|n<e$fJL@~N7ToL@37)=rMF
z>F;&jd|j9vN9dh++v(O+W7<_k=5LdGX3frv?^w_pTCnMKyX52J=#v`nUXRyPsFnSV
z^ZN<1w#l|G({Dx;sV8bT6pAa=3*S~%l+Dx}-50q;tawX3KDDok=2R3R|7iD)ySmgS
ztMtt=;i2m)%Ca2JX}5&5frS>2DiX_HZwlP@A>8q<D9)jd^U~yXpYiwMf;gWfCw5K4
z1uZ38?xS9Jog;2KtbTsMtgEdldePGAn8Q>GQmZIZ25VE5gb?rc-qMEtDou=Q3U%e#
zxb>7wzpnE3>OPZ}MXcO*>nG1VUv((TS0#6Suy~VSyHtL|{U>s1*7NLxT?uJr)cFku
zo=KI8<4H-{q@)<-t5(YzP6;V-a*t)b$Aee!x{_YodVkFTa5V?}WO#<wjWI%ZJSG7E
zqWq4jsm8%W9`d#!>;)&?Hs6*s54J;(HpFZs!+Be_f-D;J#X=dP+);+;aU3>>4$iO`
zis>U`SPYH1Z8!~u3_cW^7s4@EjQ>mo7G;P*0hqx)C>WG5AHbL^;TXUiu^0`TY5)Le
znC`=94geS|bO0eHD4MB(4?*>m!%zbVs)0jTcRiK}-M_+i57Fx74MF#ghoId$S~uvh
zqhV41q*a@Irqyrd6I6pSPk@kW69}o3I+|c}RV1`GXbWg~G>uKC@zDF>@%Swqrhvt!
zGT3xb2=ed?abfZJ2nNWcQ`s~Y2*qpxlj(@Vvp_zLPUF*tpN!121+3^;DnB+_hrx5g
zqrfeEJcRp1HI!g6gdlwmgUtscK`uUV6IAg@!D!TYy|ub7W3msIj1(Y%C`iOn1Pmq;
z9dTdn6R$V%+hju}|CvlTl;7t1T=73pG4xK1{SI|ULMP^5Qt8d(f;2vd3wa)oXEMUM
zG;XZU{|Fk7A5G(K7l8W4J3uav!C~t%L}wz2;Nnbh!9y0{^<zgJF#+44XSzZzjU5T<
zRgIWK9-qeLLwRQi&Etd7(9%V}<mi^J&hzv$H8^!&@)BGKBwag57N5a~)O<<1DG@*u
lz7RbpI+nkc!^RCZzzwO!*@p`lOghea7LW#=v>@nb>TfdTeoO!W

literal 0
HcmV?d00001

diff --git a/tests/test_data/outputs/connectors_transfer.parquet.gzip b/tests/test_data/outputs/connectors_transfer.parquet.gzip
new file mode 100644
index 0000000000000000000000000000000000000000..0d024133443bd21d0409710b9277e4dbcc9489de
GIT binary patch
literal 1362
zcmb`H&ui2`6vt;{3=1k^Au*8!!%&Nb?Xo{^rBo{(6h(>`4_Y>p&1^e1nZ#tGUDg#Z
zf}r0013Y-~B0YHUKM?va*n^(Dde@iPG`p$og6JNGnY{OXpKs>9?E2c>mX7qge!Z@%
zu+j7?LKk1blKp=5T*q<9H{v<_nrkBT<^0EY1_Y#Jb%x~g+4(h7Lvydstrq#S5I_5_
z^PYOoPoTwnS3VSyYcmi}&&<DgYwja-^LzJ=R4CSi8(Pxaq5*-|&=vivu9zo79(wfB
zh2rd$&*~H6Z%@K-WCD(cYSpaU3)PygSUR%uGP-K0#vP+?A)~rts;4Umnd&K6fp68|
zR)7N6v%m|Pq~$UPWE^B)A!WQClI>LvbkfxnZLb2lTTt6nM}OjWl_?z)xy}RJt^&F_
zsrv!cGSB#=7%KQSwTN(!B<4$Ph<l-*2E5C-M|Ws~m+kvQ+-KDHx|{?Q&OG(~x{U)W
zh(`pOJSoR{8bpJx7(|%{hwek#6Bu-3R1pZ+4)vB87j%=xc(ef-V+Ti}vFv45<yiV8
zrEDMtM5ld(fjls(+lSs0%ht_?6)FD=%cgKP>mcJlpqT8$@pqVg1DTn>M9JpGl!!2f
zo)59l+?d3Jtp9zIh=|0GQYs5Srg6eTo<-WtcBj#5Hd+{3faTbAOB1jS&$7gr@J%XR
zmF6%JBo+{Ff;tg2f^AD~!))7@6P3^0G_}!cbh2V#N-zOlrlA`nWHzQOFGT~f6>>W-
XU>Bb4%Wlei&u%WFXDUML@NfDXkdt5%

literal 0
HcmV?d00001

diff --git a/tests/test_graph.py b/tests/test_graph.py
index a4eb8fd..373e77f 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -1,12 +1,43 @@
 import os
 
+from graph_tool import Graph
 import numpy as np
 import pandas as pd
 import pytest
+from unittest.mock import Mock
 
 from gtfs_skims import graph
 
 
+@pytest.fixture()
+def mock_config(mocker):
+    mock = Mock()
+    mock.weight_wait = 3
+    mock.weight_walk = 2
+    mock.penalty_interchange = 600
+    return mock
+
+
+@pytest.fixture()
+def small_graph() -> Graph:
+    edges = pd.DataFrame({
+        'onode': [0, 0, 1, 2],
+        'dnode': [1, 2, 3, 3],
+        'gc': [10, 20, 15, 4]
+    })
+    return graph.build_graph(edges, vars=['gc'])
+
+
+@pytest.fixture()
+def small_graph_birectional() -> Graph:
+    edges = pd.DataFrame({
+        'onode': [0, 0, 1, 2, 1, 2, 3, 3],
+        'dnode': [1, 2, 3, 3, 0, 0, 1, 2],
+        'gc': [10, 20, 15, 4, 10, 20, 15, 4]
+    })
+    return graph.build_graph(edges, vars=['gc'])
+
+
 def test_get_ivt_times():
     stop_times = pd.DataFrame({
         'trip_id': [0, 1, 0, 1],
@@ -23,11 +54,46 @@ def test_get_ivt_times():
 def test_get_all_edges(gtfs_data_preprocessed, connectors_data):
     edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data)
 
-    len_expected = len(gtfs_data_preprocessed.stop_times) - gtfs_data_preprocessed.stop_times['trip_id'].nunique()
+    # all connections are included
+    len_expected = len(gtfs_data_preprocessed.stop_times) - \
+        gtfs_data_preprocessed.stop_times['trip_id'].nunique()
     len_expected += len(connectors_data.connectors_transfer)
     len_expected += len(connectors_data.connectors_access)
     len_expected += len(connectors_data.connectors_egress)
     assert len(edges) == len_expected
 
+    # all variables are included
     assert list(edges.columns) == [
         'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer']
+
+
+def test_calculate_gc(mock_config):
+    edges = pd.DataFrame({
+        'ivt': [100, 200],
+        'walk': [30, 10],
+        'wait': [10, 5],
+        'transfer': [0, 1]
+    })
+    graph.add_gc(edges, mock_config)
+    assert list(edges['gc']) == [190, 835]
+
+
+def test_get_shortest_distance_single(small_graph):
+    dists = graph.get_shortest_distances_single(small_graph, 0, [3, 2, 1, 0])
+    expected = np.array([24, 20, 10, 0])
+    assert dists[0] == 0  # the first value is the source
+    np.testing.assert_equal(dists[1:], expected)
+
+
+def test_get_distance_matrix(small_graph_birectional):
+    distmat = graph.get_shortest_distances(
+        small_graph_birectional, [0, 1, 2], [1, 2])
+    expected = np.array([
+        [10, 20],
+        [0, 19],
+        [19, 0]
+    ])
+    assert list(distmat.index) == [0, 1, 2]
+    assert list(distmat.columns) == [1, 2]
+
+    np.testing.assert_equal(distmat.values, expected)

From 0bf9fd9b563f5380fccfd15ea90530b5af1b0727 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Tue, 12 Dec 2023 15:17:56 +0000
Subject: [PATCH 17/30] complete workflow

---
 .gitignore          |  2 +-
 gtfs_skims/cli.py   | 12 +++++--
 gtfs_skims/graph.py | 83 +++++++++++++++++++++++++++++++++++++++++++--
 tests/test_cli.py   |  6 +++-
 tests/test_graph.py |  6 ++++
 5 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0cb40c9..56c75cc 100755
--- a/.gitignore
+++ b/.gitignore
@@ -40,5 +40,5 @@ mike-*.yml
 .ipynb_checkpoints
 
 sandbox.py
-# tests/test_data/outputs/
+tests/test_data/outputs/
 tests/test_data/outputs/*log
\ No newline at end of file
diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py
index 110072f..1632f43 100755
--- a/gtfs_skims/cli.py
+++ b/gtfs_skims/cli.py
@@ -5,6 +5,7 @@
 
 from gtfs_skims.preprocessing import main as main_preprocessing
 from gtfs_skims.connectors import main as main_connectors
+from gtfs_skims.graph import main as main_graph
 from gtfs_skims.utils import Config
 
 
@@ -27,8 +28,15 @@ def run(config_path: str, output_directory_override: Optional[str] = None):
         config.path_outputs = output_directory_override
     steps = config.steps
 
+    gtfs_data = None
+    connectors_data = None
+
     if 'preprocessing' in steps:
-        main_preprocessing(config=config)
+        gtfs_data = main_preprocessing(config=config)
 
     if 'connectors' in steps:
-        main_connectors(config=config)
+        connectors_data = main_connectors(config=config, data=gtfs_data)
+
+    if 'graph' in steps:
+        main_graph(config=config,  gtfs_data=gtfs_data,
+                   connectors_data=connectors_data)
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index 3db94b2..68d75b5 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -1,5 +1,6 @@
 from functools import partial
 import multiprocessing
+import os
 from typing import Optional
 
 from graph_tool import Graph
@@ -72,7 +73,7 @@ def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame:
     # adding unweighted time as well
     edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1)
 
-    return edges['gc']
+    return edges
 
 
 def build_graph(
@@ -104,6 +105,18 @@ def get_shortest_distances_single(
         max_dist: Optional[float] = None,
         attribute: str = 'gc'
 ) -> np.ndarray:
+    """Get shortest distances from a single origin.
+
+    Args:
+        graph (Graph): GTFS graph.
+        onode (int): Source node.
+        dnodes (list[int]): Destination nodes.
+        max_dist (Optional[float], optional): Maximum search distance. Defaults to None.
+        attribute (str, optional): Edge weights attribute. Defaults to 'gc'.
+
+    Returns:
+        np.ndarray: Shortest distances. The first value is the source node.
+    """
     d = shortest_distance(graph, onode, dnodes,
                           weights=graph.edge_properties[attribute], dense=False,
                           max_dist=max_dist, directed=True)
@@ -119,6 +132,20 @@ def get_shortest_distances(
         max_dist: Optional[float] = None,
         attribute: str = 'gc'
 ) -> pd.DataFrame:
+    """Get shortest distances from a set of origins to a set of destinations.
+
+    Args:
+        graph (Graph): GTFS graph.
+        onode (int): Source nodes.
+        dnodes (list[int]): Destination nodes.
+        max_dist (Optional[float], optional): Maximum search distance. Defaults to None.
+        attribute (str, optional): Edge weights attribute. Defaults to 'gc'.
+
+    Returns:
+        pd.DataFrame: Shortest distances matrix.
+            The dataframe indices are the origin nodes,
+                and the column indices are the destination nodes.
+    """
     n_cpus = multiprocessing.cpu_count() - 1
     dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes,
                            max_dist=max_dist, attribute=attribute)
@@ -135,5 +162,55 @@ def get_shortest_distances(
     return dists
 
 
-def main():
-    pass
+def main(
+    config: Config,
+    gtfs_data: Optional[GTFSData] = None,
+    connectors_data: Optional[ConnectorsData] = None
+) -> pd.DataFrame:
+    # read
+    logger = get_logger(os.path.join(
+        config.path_outputs, 'log_graph.log'))
+
+    logger.info('Reading files...')
+    if gtfs_data is None:
+        gtfs_data = GTFSData.from_parquet(path=config.path_outputs)
+    if connectors_data is None:
+        connectors_data = ConnectorsData.from_parquet(path=config.path_outputs)
+    origins = pd.read_csv(config.path_origins, index_col=0)
+    destinations = pd.read_csv(config.path_destinations, index_col=0)
+
+    # graph
+    logger.info('Building graph...')
+    edges = get_all_edges(gtfs_data, connectors_data)
+    edges = add_gc(edges=edges, config=config)
+    g = build_graph(edges=edges)
+
+    # shortest paths
+    logger.info('Calculating shortest distances...')
+    origins['idx'] = range(len(origins))
+    origins['idx'] += len(gtfs_data.stop_times)
+    destinations['idx'] = range(len(destinations))
+    destinations['idx'] += (len(gtfs_data.stop_times)+len(origins))
+
+    onodes_graph = list(origins[origins['idx'].isin(edges['onode'])]['idx'])
+    dnodes_graph = list(
+        destinations[destinations['idx'].isin(edges['dnode'])]['idx'])
+    distmat = get_shortest_distances(
+        g, onodes=onodes_graph, dnodes=dnodes_graph)
+
+    # map labels
+    distmat.index = distmat.index.map(
+        origins.reset_index().set_index('idx')['name']
+    )
+    distmat.columns = distmat.columns.map(
+        destinations.reset_index().set_index('idx')['name']
+    )
+
+    # diagonal infilling
+
+    # save
+    path = os.path.join(config.path_outputs, 'skims.parquet.gzip')
+    logger.info(f'Saving results to {path}...')
+    distmat.to_parquet(path, compression='gzip')
+
+    return distmat
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a8540fd..a0aa408 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -42,4 +42,8 @@ def test_run_steps_saves_outputs(tmpdir):
     for x in ['transfer', 'access', 'egress']:
         assert os.path.exists(
             os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
-        )
\ No newline at end of file
+        )
+
+    assert os.path.exists(
+        os.path.join(tmpdir, f'skims.parquet.gzip')
+    )
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 373e77f..a1be1e6 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -97,3 +97,9 @@ def test_get_distance_matrix(small_graph_birectional):
     assert list(distmat.columns) == [1, 2]
 
     np.testing.assert_equal(distmat.values, expected)
+
+
+def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir):
+    config.path_outputs = tmpdir
+    distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed,
+               connectors_data=connectors_data)

From e3f1cbb7cfa7da0ab4e346911eb03627aaf9bfd8 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Tue, 12 Dec 2023 15:39:32 +0000
Subject: [PATCH 18/30] matrix expansion

---
 gtfs_skims/graph.py | 15 +++++++++------
 tests/test_graph.py |  7 ++++++-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index 68d75b5..af8b862 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -198,19 +198,22 @@ def main(
     distmat = get_shortest_distances(
         g, onodes=onodes_graph, dnodes=dnodes_graph)
 
+    # expand to the full OD space
+    distmat_full = pd.DataFrame(
+        np.inf, index=origins['idx'], columns=destinations['idx'])
+    distmat_full.loc[distmat.index, distmat.columns] = distmat.values
+
     # map labels
-    distmat.index = distmat.index.map(
+    distmat_full.index = distmat_full.index.map(
         origins.reset_index().set_index('idx')['name']
     )
-    distmat.columns = distmat.columns.map(
+    distmat_full.columns = distmat_full.columns.map(
         destinations.reset_index().set_index('idx')['name']
     )
 
-    # diagonal infilling
-
     # save
     path = os.path.join(config.path_outputs, 'skims.parquet.gzip')
     logger.info(f'Saving results to {path}...')
-    distmat.to_parquet(path, compression='gzip')
+    distmat_full.to_parquet(path, compression='gzip')
 
-    return distmat
+    return distmat_full
diff --git a/tests/test_graph.py b/tests/test_graph.py
index a1be1e6..6667e90 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -100,6 +100,11 @@ def test_get_distance_matrix(small_graph_birectional):
 
 
 def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir):
+    origins = pd.read_csv(config.path_origins, index_col=0)
+    destinations = pd.read_csv(config.path_destinations, index_col=0)
     config.path_outputs = tmpdir
     distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed,
-               connectors_data=connectors_data)
+                         connectors_data=connectors_data)
+
+    assert list(distmat.index) == list(origins.index)
+    assert list(distmat.columns) == list(destinations.index)

From 40ff44f405075e60febfea12d09f2c4fce064724 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Tue, 12 Dec 2023 23:07:25 +0000
Subject: [PATCH 19/30] docs

---
 docs/index.md       |  6 +++++-
 docs/methodology.md | 24 +++++++++++++++++++++++
 docs/run.md         | 48 +++++++++++++++++++++++++++++++++++++++++++++
 gtfs_skims/graph.py | 12 ++++++++----
 mkdocs.yml          |  2 ++
 5 files changed, 87 insertions(+), 5 deletions(-)
 create mode 100644 docs/methodology.md
 create mode 100644 docs/run.md

diff --git a/docs/index.md b/docs/index.md
index 8d4a99a..6010ddb 100755
--- a/docs/index.md
+++ b/docs/index.md
@@ -1 +1,5 @@
---8<-- "README.md:docs"
\ No newline at end of file
+--8<-- "README.md:docs"
+
+# Argo
+
+Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files.
\ No newline at end of file
diff --git a/docs/methodology.md b/docs/methodology.md
new file mode 100644
index 0000000..ea87663
--- /dev/null
+++ b/docs/methodology.md
@@ -0,0 +1,24 @@
+# Methodology
+
+Argo calculates generalised time matrices between a set of origin and destination points.
+
+Generalised time is defined as follows:
+
+$$
+gc = ivt + \beta_{wait} \cdot wait\_time + \beta_{walk} \cdot walk\_time + \beta_{interchange\_penalty} \cdot n\_transfers
+$$
+
+Some example values for the leg component weights are: 
+
+$$
+\beta_{wait} = \beta_{walk} = 2-3
+$$
+
+and 
+
+$$
+\beta_{\text{interchange\_penalty}} = 5 \text{ to } 10 \text{ minutes}
+$$
+
+Walk distance is calculated as the crow's fly distance between two points, multiplied by a factor specified in the config file (typically ~1.3).
+
diff --git a/docs/run.md b/docs/run.md
new file mode 100644
index 0000000..bfe1eb6
--- /dev/null
+++ b/docs/run.md
@@ -0,0 +1,48 @@
+# Running Argo
+
+To run argo simply type this command on the command line:
+```
+argo run <CONFIG_PATH>
+```
+, where <CONFIG_PATH> is the path to the config yaml file.
+
+An example config file is shown below:
+```
+paths:
+  path_gtfs: ./tests/test_data/iow-bus-gtfs.zip
+  path_outputs: ./tests/test_data/outputs
+  path_origins: ./tests/test_data/centroids.csv # path to the origin points
+  path_destinations: ./tests/test_data/centroids.csv # path to the destination points
+
+settings:
+  calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file.
+  start_s : 32400 # sec | Start time of the journey.
+  end_s : 41400 # sec | Max end time of a journey.
+  walk_distance_threshold : 2000  # m | Max walk distance in a leg
+  walk_speed : 4.5  # kph | Walking speed
+  crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances
+  max_transfer_time : 1800 # Max combined time of walking and waiting (sec) of a transfer
+  max_wait : 1800  # sec | Max wait time at a stop / leg
+  bounding_box : null
+  epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters.
+  weight_walk: 2 # value of walk time, ratio to in-vehicle time
+  weight_wait: 2 # value of wait time, ratio to in-vehicle time
+  penalty_interchange: 300 # seconds added to generalised cost for each interchange
+
+steps:
+  - preprocessing
+  - connectors
+  - graph
+```
+
+To run the example provided by the repo, use:
+```
+argo run ./tests/test_data/config_demo.yaml
+```
+
+The time matrices will be saved in the `output_path` directory defined in the config file, in the `skims.parquet.gzip` file. An easy way to read the file is with pandas:
+```
+import pandas as pd
+df = pd.read_parquet('<OUTPUT_PATH>/skims.parquet.gzip')
+df
+```
\ No newline at end of file
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index af8b862..2965936 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -192,11 +192,11 @@ def main(
     destinations['idx'] = range(len(destinations))
     destinations['idx'] += (len(gtfs_data.stop_times)+len(origins))
 
-    onodes_graph = list(origins[origins['idx'].isin(edges['onode'])]['idx'])
-    dnodes_graph = list(
+    onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx'])
+    dnodes_scope = list(
         destinations[destinations['idx'].isin(edges['dnode'])]['idx'])
     distmat = get_shortest_distances(
-        g, onodes=onodes_graph, dnodes=dnodes_graph)
+        g, onodes=onodes_scope, dnodes=dnodes_scope)
 
     # expand to the full OD space
     distmat_full = pd.DataFrame(
@@ -211,9 +211,13 @@ def main(
         destinations.reset_index().set_index('idx')['name']
     )
 
+    # infill intra_zonal
+    distmat_full = distmat_full.\
+        apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0)
+
     # save
     path = os.path.join(config.path_outputs, 'skims.parquet.gzip')
     logger.info(f'Saving results to {path}...')
-    distmat_full.to_parquet(path, compression='gzip')
+    distmat_full.to_parquet(path, compression='gzip', index=True)
 
     return distmat_full
diff --git a/mkdocs.yml b/mkdocs.yml
index 1064385..235e7ca 100755
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -2,6 +2,8 @@ site_name: gtfs-skims
 nav:
   - Home: index.md
   - Installation: installation.md
+  - Running the tool: run.md 
+  - Methodology: methodology.md
   - Contributing: contributing.md
   - Changelog: CHANGELOG.md
   - Reference:

From ddc4dfd85db9bd719c4f997ea404dc008a6edf77 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Wed, 13 Dec 2023 09:36:41 +0000
Subject: [PATCH 20/30] docs

---
 docs/index.md       | 2 +-
 gtfs_skims/graph.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 6010ddb..50f4372 100755
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,4 +2,4 @@
 
 # Argo
 
-Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files.
\ No newline at end of file
+Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale: for example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
\ No newline at end of file
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index 2965936..1ed528e 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -195,8 +195,10 @@ def main(
     onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx'])
     dnodes_scope = list(
         destinations[destinations['idx'].isin(edges['dnode'])]['idx'])
+    maxdist = config.end_s - config.start_s
     distmat = get_shortest_distances(
-        g, onodes=onodes_scope, dnodes=dnodes_scope)
+        g, onodes=onodes_scope, dnodes=dnodes_scope, 
+        max_dist=maxdist)
 
     # expand to the full OD space
     distmat_full = pd.DataFrame(
@@ -214,6 +216,7 @@ def main(
     # infill intra_zonal
     distmat_full = distmat_full.\
         apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0)
+    distmat_full = distmat_full.map(lambda x: np.where(x>=maxdist, np.inf, x))
 
     # save
     path = os.path.join(config.path_outputs, 'skims.parquet.gzip')

From 4638c03e2844d7344f7026451403c67e15f2a158 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Wed, 13 Dec 2023 09:37:31 +0000
Subject: [PATCH 21/30] release date

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e2a5dc..7e5d2c2 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Removed
 
-## [v0.1.0] - 2023-11-28
+## [v0.1.0] - 2023-12-13
 
 Initial release.
 

From 68ca4a78465de6ecef07210d6e9fd8e2f0b2d32a Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Wed, 13 Dec 2023 09:50:18 +0000
Subject: [PATCH 22/30] more docs

---
 docs/methodology.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/methodology.md b/docs/methodology.md
index ea87663..d8261e7 100644
--- a/docs/methodology.md
+++ b/docs/methodology.md
@@ -22,3 +22,16 @@ $$
 
 Walk distance is calculated as the crow's fly distance between two points, multiplied by a factor specified in the config file (typically ~1.3).
 
+The library creates a graph representation of the GTFS dataset, where the edges represent vehicle movements or connections (access/egress/transfer legs). It then applied a shortest-paths algorithm, using generalised time as edge weights.
+
+To achieve high performance, the user can limit the search space by:
+* selecting a time scope and maximum travel time
+* selecting a specific day
+* selecting a maximum walk, wait and trasfer time for legs
+* applying a spatial bounding box
+
+We further improve performance by:
+* using K-dimensional trees to organise spatial data
+* using the effiecient graph-tool library to calculate shortest distances
+* parallelising the shortest distances calculation, and vectorising data transformation tasks
+* saving files to a compressed parquet format
\ No newline at end of file

From fd44fd1321df15ecd660d6bd81c654444f3fa92c Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Wed, 13 Dec 2023 10:11:09 +0000
Subject: [PATCH 23/30] remove typing.Self references

---
 gtfs_skims/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index 0f796c1..2f28821 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -4,7 +4,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import Optional, Self
+from typing import Optional
 import yaml
 from zipfile import ZipFile
 
@@ -207,7 +207,7 @@ def __repr__(self) -> str:
 @dataclass
 class Data:
     @classmethod
-    def from_gtfs(cls, path_gtfs: str) -> Self:
+    def from_gtfs(cls, path_gtfs: str) -> Data:
         """Load GTFS tables from a standard zipped GTFS file. 
 
         Args:
@@ -224,7 +224,7 @@ def from_gtfs(cls, path_gtfs: str) -> Self:
         return cls(**data)
 
     @classmethod
-    def from_parquet(cls, path: str) -> Self:
+    def from_parquet(cls, path: str) -> Data:
         """Construct class from pre-processed GTFS tables in Parquet format.
 
         Args:

From 4b3476485fedd1e45a355ecaab66784a9e955c74 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Wed, 13 Dec 2023 10:21:47 +0000
Subject: [PATCH 24/30] remove windows support

---
 .github/workflows/pr-ci.yml | 2 +-
 docs/installation.md        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml
index a2aecaa..d26a9f3 100755
--- a/.github/workflows/pr-ci.yml
+++ b/.github/workflows/pr-ci.yml
@@ -16,7 +16,7 @@ jobs:
   test:
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-latest, macos-latest]
+        os: [ubuntu-latest, macos-latest]
         py3version: ["9", "11"]
       fail-fast: false
     uses: arup-group/actions-city-modelling-lab/.github/workflows/python-install-lint-test.yml@main
diff --git a/docs/installation.md b/docs/installation.md
index 3c1d127..876e1f6 100755
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,6 +1,8 @@
 
 # Installation
 
+Note: this library only supports Unix-based systems (ie Ubuntu/macOS). If you wish to use it on Windows please use the Windows Subsystem for Linux.
+
 ## Setting up a user environment
 
 As a `gtfs_skims` user, it is easiest to install using the [mamba](https://mamba.readthedocs.io/en/latest/index.html) package manager, as follows:

From d32ed32e2b3659c34f353ed9e6941584626b5249 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 15 Dec 2023 09:06:08 +0000
Subject: [PATCH 25/30] pre-commit checks

---
 README.md                   |   5 +
 docs/index.md               |   4 +-
 gtfs_skims/cli.py           |  20 ++--
 gtfs_skims/connectors.py    | 176 ++++++++++++++++++------------------
 gtfs_skims/graph.py         | 166 ++++++++++++++++++----------------
 gtfs_skims/preprocessing.py | 107 +++++++---------------
 gtfs_skims/utils.py         | 100 ++++----------------
 gtfs_skims/variables.py     |  28 +++---
 requirements/base.txt       |   5 +-
 tests/conftest.py           |  12 +--
 tests/test_cli.py           |  29 +++---
 tests/test_connectors.py    |  61 +++++--------
 tests/test_graph.py         |  69 ++++++--------
 tests/test_preprocessing.py |  42 ++++-----
 tests/test_utils.py         |  20 ++--
 15 files changed, 359 insertions(+), 485 deletions(-)

diff --git a/README.md b/README.md
index d6ddf93..7753847 100755
--- a/README.md
+++ b/README.md
@@ -9,6 +9,11 @@
 
 <!--- --8<-- [end:docs] -->
 
+
+Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. 
+By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. 
+For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
+
 ## Documentation
 
 For more detailed instructions, see our [documentation](https://arup-group.github.io/gtfs_skims/latest).
diff --git a/docs/index.md b/docs/index.md
index 50f4372..3909da0 100755
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,4 +2,6 @@
 
 # Argo
 
-Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale: for example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
\ No newline at end of file
+Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. 
+By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. 
+For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
\ No newline at end of file
diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py
index 1632f43..5c93373 100755
--- a/gtfs_skims/cli.py
+++ b/gtfs_skims/cli.py
@@ -1,11 +1,12 @@
 """Console script for gtfs_skims."""
 
-import click
 from typing import Optional
 
-from gtfs_skims.preprocessing import main as main_preprocessing
+import click
+
 from gtfs_skims.connectors import main as main_connectors
 from gtfs_skims.graph import main as main_graph
+from gtfs_skims.preprocessing import main as main_preprocessing
 from gtfs_skims.utils import Config
 
 
@@ -13,14 +14,12 @@
 @click.group
 def cli(args=None):
     """Console script for gtfs_skims."""
-    click.echo(
-        "Console script for Argo (gtfs_skims)."
-    )
+    click.echo("Console script for Argo (gtfs_skims).")
     return 0
 
 
 @cli.command()
-@click.argument('config_path')
+@click.argument("config_path")
 @click.option("--output_directory_override", default=None, help="override output directory")
 def run(config_path: str, output_directory_override: Optional[str] = None):
     config = Config.from_yaml(config_path)
@@ -31,12 +30,11 @@ def run(config_path: str, output_directory_override: Optional[str] = None):
     gtfs_data = None
     connectors_data = None
 
-    if 'preprocessing' in steps:
+    if "preprocessing" in steps:
         gtfs_data = main_preprocessing(config=config)
 
-    if 'connectors' in steps:
+    if "connectors" in steps:
         connectors_data = main_connectors(config=config, data=gtfs_data)
 
-    if 'graph' in steps:
-        main_graph(config=config,  gtfs_data=gtfs_data,
-                   connectors_data=connectors_data)
+    if "graph" in steps:
+        main_graph(config=config, gtfs_data=gtfs_data, connectors_data=connectors_data)
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index a3536fd..b611756 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
-from dataclasses import dataclass
-from functools import cached_property
+
 import os
+from functools import cached_property
 from typing import Optional
 
 import numpy as np
-from scipy.spatial import KDTree
 import pandas as pd
+from scipy.spatial import KDTree
 
-from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger
+from gtfs_skims.utils import Config, ConnectorsData, GTFSData, get_logger
 from gtfs_skims.variables import DATA_TYPE
 
 
@@ -27,13 +27,13 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array:
     ids = coords[:, 2].argsort()
 
     dtree = KDTree(coords[ids])
-    connectors = dtree.query_pairs(r=radius, output_type='ndarray', p=2)
+    connectors = dtree.query_pairs(r=radius, output_type="ndarray", p=2)
 
     return ids[connectors]
 
 
 class TransferConnectors:
-    """ Manages transfer connectors. """
+    """Manages transfer connectors."""
 
     def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None:
         self.coords = coords
@@ -65,12 +65,12 @@ def walk(self) -> np.array:
         Returns:
             np.array: Distance from origin to destination point (on the xy axis).
         """
-        walk = ((self.dcoords[:, :2]-self.ocoords[:, :2])**2).sum(1)**0.5
+        walk = ((self.dcoords[:, :2] - self.ocoords[:, :2]) ** 2).sum(1) ** 0.5
         return walk
 
     @cached_property
     def wait(self) -> np.array:
-        """Wait distance. It is calculated as the difference between timestamps (dz) 
+        """Wait distance. It is calculated as the difference between timestamps (dz)
             and the distance required to walk to the destination.
 
         Returns:
@@ -106,7 +106,7 @@ def filter_feasible_transfer(self, maxdist: float) -> None:
         Args:
             maxdist (float): Maximum transfer distance (walk+wait)
         """
-        is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist)
+        is_feasible = (self.wait > 0) & ((self.walk + self.wait) <= maxdist)
         self.filter(is_feasible)
 
     def filter_max_walk(self, max_walk: float) -> None:
@@ -115,7 +115,7 @@ def filter_max_walk(self, max_walk: float) -> None:
         Args:
             max_walk (float): Max walk distance
         """
-        cond = (self.walk <= max_walk)
+        cond = self.walk <= max_walk
         self.filter(cond)
 
     def filter_max_wait(self, max_wait: float) -> None:
@@ -132,9 +132,7 @@ def filter_same_route(self, routes: np.ndarray) -> None:
         Args:
             routes (np.array): Route IDs array. Its indexing matches the self.coords table.
         """
-        self.filter(
-            routes[self.ods[:, 0]] != routes[self.ods[:, 1]]
-        )
+        self.filter(routes[self.ods[:, 0]] != routes[self.ods[:, 1]])
 
     def filter_nearest_service(self, services: np.ndarray) -> None:
         """If a service can be accessed from a origin through multiple stops,
@@ -150,8 +148,8 @@ def filter_nearest_service(self, services: np.ndarray) -> None:
         idx_sorted = transfer.argsort()
 
         # create origin-service combinations
-        order_o = int(np.floor(np.log10(services.max()))+1)
-        comb = (self.ods[:, 0]+1) * 10**order_o + services_d
+        order_o = int(np.floor(np.log10(services.max())) + 1)
+        comb = (self.ods[:, 0] + 1) * 10**order_o + services_d
 
         # get first instance of each origin-service combination
         # (which corresponds to the most efficient transfer)
@@ -162,9 +160,7 @@ def filter_nearest_service(self, services: np.ndarray) -> None:
 
 
 def query_pairs_od(
-        coords_origins: np.ndarray,
-        coords_destinations: np.ndarray,
-        radius: float
+    coords_origins: np.ndarray, coords_destinations: np.ndarray, radius: float
 ) -> np.array:
     """Get origin-destination pairs between points, within a radius.
 
@@ -179,26 +175,24 @@ def query_pairs_od(
     tree_origins = KDTree(coords_origins)
     tree_destinations = KDTree(coords_destinations)
 
-    ods = tree_origins.query_ball_tree(
-        tree_destinations, r=radius)
+    ods = tree_origins.query_ball_tree(tree_destinations, r=radius)
 
     # flatten
-    ods = np.column_stack([
-        np.repeat(range(len(coords_origins)), list(map(len, ods))),
-        np.concatenate(ods)
-    ]).astype(DATA_TYPE)
+    ods = np.column_stack(
+        [np.repeat(range(len(coords_origins)), list(map(len, ods))), np.concatenate(ods)]
+    ).astype(DATA_TYPE)
 
     return ods
 
 
 class AccessEgressConnectors(TransferConnectors):
-    """ Connections between zones/endpoints and stops """
+    """Connections between zones/endpoints and stops"""
 
     def __init__(
-            self,
-            coords_origins: np.ndarray,
-            coords_destinations: np.ndarray,
-            max_transfer_distance: float
+        self,
+        coords_origins: np.ndarray,
+        coords_destinations: np.ndarray,
+        max_transfer_distance: float,
     ) -> None:
         self.coords_origins = coords_origins
         self.coords_destinations = coords_destinations
@@ -207,8 +201,7 @@ def __init__(
         if coords_origins.shape[1] == 3:
             radius += max_transfer_distance * (2**0.5)
 
-        self.ods = query_pairs_od(coords_origins, coords_destinations,
-                                  radius=radius)
+        self.ods = query_pairs_od(coords_origins, coords_destinations, radius=radius)
 
     @cached_property
     def ocoords(self) -> np.array:
@@ -239,14 +232,13 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
     Returns:
         np.ndarray: [origin id, destination id, walk time, wait time]
     """
-    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+    time_to_distance = config.walk_speed / 3.6  # km/hr to meters
     max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
 
     # get candidate connectors
-    coords = data.stop_times[['x', 'y', 'departure_s']].values
-    coords[:, :2] = coords[:, :2] * \
-        config.crows_fly_factor  # crow's fly transformation
+    coords = data.stop_times[["x", "y", "departure_s"]].values
+    coords[:, :2] = coords[:, :2] * config.crows_fly_factor  # crow's fly transformation
     tc = TransferConnectors(coords, max_transfer_distance)
 
     # apply more narrow filters:
@@ -262,23 +254,26 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array:
         tc.filter_max_wait(max_wait_distance)
 
     # not same route
-    routes = data.stop_times['trip_id'].map(
-        data.trips.set_index('trip_id')['route_id']
-    ).values
+    routes = data.stop_times["trip_id"].map(data.trips.set_index("trip_id")["route_id"]).values
     tc.filter_same_route(routes)
 
     # most efficient transfer to service
-    services = data.stop_times['trip_id'].map(
-        data.trips.set_index('trip_id')['service_id']
-    ).values
+    services = data.stop_times["trip_id"].map(data.trips.set_index("trip_id")["service_id"]).values
     tc.filter_nearest_service(services)
 
     # construct array
-    arr = np.concatenate([
-        tc.ods,  # origin and destination index
-        (tc.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
-        (tc.wait/time_to_distance).reshape(-1, 1)  # wait time (seconds)
-    ], axis=1).round(1).astype(DATA_TYPE)
+    arr = (
+        np.concatenate(
+            [
+                tc.ods,  # origin and destination index
+                (tc.walk / time_to_distance).reshape(-1, 1),  # walk time (seconds)
+                (tc.wait / time_to_distance).reshape(-1, 1),  # wait time (seconds)
+            ],
+            axis=1,
+        )
+        .round(1)
+        .astype(DATA_TYPE)
+    )
 
     return arr
 
@@ -289,25 +284,22 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame)
     Args:
         data (GTFSData): GTFS data object.
         config (Config): Config object.
-        destinations (pd.DataFrame): Origin coordinates dataframe. 
+        destinations (pd.DataFrame): Origin coordinates dataframe.
         Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip start points.
 
     Returns:
         np.ndarray: [origin id, destination id, walk time, wait time]
     """
-    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+    time_to_distance = config.walk_speed / 3.6  # km/hr to meters
     max_transfer_distance = config.max_transfer_time * time_to_distance
     max_wait_distance = config.max_wait * time_to_distance
 
     # get candidate connectors
-    coords_stops = data.stop_times[['x', 'y', 'departure_s']].values
-    coords_stops[:, :2] = coords_stops[:, :2] * \
-        config.crows_fly_factor  # crow's fly transformation
-    coords_origins = (origins[['x', 'y']] * config.crows_fly_factor).\
-        assign(z=config.start_s).values
+    coords_stops = data.stop_times[["x", "y", "departure_s"]].values
+    coords_stops[:, :2] = coords_stops[:, :2] * config.crows_fly_factor  # crow's fly transformation
+    coords_origins = (origins[["x", "y"]] * config.crows_fly_factor).assign(z=config.start_s).values
 
-    ac = AccessEgressConnectors(
-        coords_origins, coords_stops, max_transfer_distance)
+    ac = AccessEgressConnectors(coords_origins, coords_stops, max_transfer_distance)
 
     # more narrow filtering
     ac.filter_feasible_transfer(max_transfer_distance)
@@ -316,11 +308,18 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame)
     if max_wait_distance < max_transfer_distance:
         ac.filter_max_wait(max_wait_distance)
 
-    arr = np.concatenate([
-        ac.ods,  # origin and destination index
-        (ac.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
-        (ac.wait/time_to_distance).reshape(-1, 1)  # wait time (seconds)
-    ], axis=1).round(1).astype(DATA_TYPE)
+    arr = (
+        np.concatenate(
+            [
+                ac.ods,  # origin and destination index
+                (ac.walk / time_to_distance).reshape(-1, 1),  # walk time (seconds)
+                (ac.wait / time_to_distance).reshape(-1, 1),  # wait time (seconds)
+            ],
+            axis=1,
+        )
+        .round(1)
+        .astype(DATA_TYPE)
+    )
 
     return arr
 
@@ -331,29 +330,33 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF
     Args:
         data (GTFSData): GTFS data object.
         config (Config): Config object.
-        destinations (pd.DataFrame): Destination coordinates dataframe. 
+        destinations (pd.DataFrame): Destination coordinates dataframe.
         Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip ends.
 
     Returns:
         np.ndarray: [origin id, destination id, walk time, wait time]
     """
-    time_to_distance = config.walk_speed/3.6  # km/hr to meters
+    time_to_distance = config.walk_speed / 3.6  # km/hr to meters
 
     # get candidate connectors
-    coords_stops = data.stop_times[['x', 'y']].values
-    coords_stops[:, :2] = coords_stops[:, :2] * \
-        config.crows_fly_factor  # crow's fly transformation
-    coords_destinations = (
-        destinations[['x', 'y']] * config.crows_fly_factor).values
-
-    ec = AccessEgressConnectors(
-        coords_stops, coords_destinations, config.walk_distance_threshold)
-
-    arr = np.concatenate([
-        ec.ods,  # origin and destination index
-        (ec.walk/time_to_distance).reshape(-1, 1),  # walk time (seconds)
-        np.array([0]*len(ec.ods)).reshape(-1, 1)  # wait time = 0
-    ], axis=1).round(1).astype(DATA_TYPE)
+    coords_stops = data.stop_times[["x", "y"]].values
+    coords_stops[:, :2] = coords_stops[:, :2] * config.crows_fly_factor  # crow's fly transformation
+    coords_destinations = (destinations[["x", "y"]] * config.crows_fly_factor).values
+
+    ec = AccessEgressConnectors(coords_stops, coords_destinations, config.walk_distance_threshold)
+
+    arr = (
+        np.concatenate(
+            [
+                ec.ods,  # origin and destination index
+                (ec.walk / time_to_distance).reshape(-1, 1),  # walk time (seconds)
+                np.array([0] * len(ec.ods)).reshape(-1, 1),  # wait time = 0
+            ],
+            axis=1,
+        )
+        .round(1)
+        .astype(DATA_TYPE)
+    )
 
     return arr
 
@@ -363,15 +366,14 @@ def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData:
 
     Args:
         config (Config): Config object.
-        data (Optional[GTFSData], optional): GTFS data object. 
-            If not provided, reads the stored parquet files from the outputs directory. 
+        data (Optional[GTFSData], optional): GTFS data object.
+            If not provided, reads the stored parquet files from the outputs directory.
             Defaults to None.
 
     Returns:
         ConnectorsData: Connectors object, holding the three output tables.
     """
-    logger = get_logger(os.path.join(
-        config.path_outputs, 'log_connectors.log'))
+    logger = get_logger(os.path.join(config.path_outputs, "log_connectors.log"))
 
     if data is None:
         data = GTFSData.from_parquet(config.path_outputs)
@@ -379,25 +381,25 @@ def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData:
     destinations = pd.read_csv(config.path_destinations, index_col=0)
 
     # get feasible connections
-    logger.info('Getting transfer connectors...')
+    logger.info("Getting transfer connectors...")
     connectors_transfer = get_transfer_connectors(data, config)
-    logger.info('Getting access connectors...')
+    logger.info("Getting access connectors...")
     connectors_access = get_access_connectors(data, config, origins)
-    logger.info('Getting egress connectors...')
+    logger.info("Getting egress connectors...")
     connectors_egress = get_egress_connectors(data, config, destinations)
 
     # convert to dataframe
-    colnames = ['onode', 'dnode', 'walk', 'wait']
+    colnames = ["onode", "dnode", "walk", "wait"]
     connectors_transfer = pd.DataFrame(connectors_transfer, columns=colnames)
     connectors_access = pd.DataFrame(connectors_access, columns=colnames)
     connectors_egress = pd.DataFrame(connectors_egress, columns=colnames)
 
     # offset IDs for endpoints
-    connectors_access['onode'] += len(data.stop_times)
-    connectors_egress['dnode'] += (len(data.stop_times)+len(origins))
+    connectors_access["onode"] += len(data.stop_times)
+    connectors_egress["dnode"] += len(data.stop_times) + len(origins)
 
     # save
-    logger.info(f'Saving connectors to {config.path_outputs}...')
+    logger.info(f"Saving connectors to {config.path_outputs}...")
     connectors = ConnectorsData(
         connectors_transfer=connectors_transfer,
         connectors_access=connectors_access,
diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py
index 1ed528e..3ce8af8 100644
--- a/gtfs_skims/graph.py
+++ b/gtfs_skims/graph.py
@@ -1,14 +1,14 @@
-from functools import partial
 import multiprocessing
 import os
+from functools import partial
 from typing import Optional
 
-from graph_tool import Graph
-from graph_tool.topology import shortest_distance
 import numpy as np
 import pandas as pd
+from graph_tool import Graph
+from graph_tool.topology import shortest_distance
 
-from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger
+from gtfs_skims.utils import Config, ConnectorsData, GTFSData, get_logger
 
 
 def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame:
@@ -21,15 +21,22 @@ def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame:
         np.ndarray: [origin id, destination id, in-vehicle time]
     """
     edges_ivt = pd.Series(range(len(stop_times)))
-    trip_id = stop_times.reset_index()['trip_id']
-    departures = stop_times.reset_index()['departure_s']
-
-    edges_ivt = pd.concat([
-        edges_ivt,
-        edges_ivt.groupby(trip_id).shift(-1),
-        departures.groupby(trip_id).shift(-1) - departures,
-    ], axis=1).dropna().map(int)
-    edges_ivt.columns = ['onode', 'dnode', 'ivt']
+    trip_id = stop_times.reset_index()["trip_id"]
+    departures = stop_times.reset_index()["departure_s"]
+
+    edges_ivt = (
+        pd.concat(
+            [
+                edges_ivt,
+                edges_ivt.groupby(trip_id).shift(-1),
+                departures.groupby(trip_id).shift(-1) - departures,
+            ],
+            axis=1,
+        )
+        .dropna()
+        .map(int)
+    )
+    edges_ivt.columns = ["onode", "dnode", "ivt"]
 
     return edges_ivt
 
@@ -44,12 +51,19 @@ def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.Da
     Returns:
         pd.DataFrame: ['onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer']
     """
-    edges = pd.concat([
-        get_ivt_edges(gtfs_data.stop_times),
-        connectors_data.connectors_transfer.assign(transfer=1),
-        connectors_data.connectors_access,
-        connectors_data.connectors_egress,
-    ], axis=0).fillna(0).map(int)
+    edges = (
+        pd.concat(
+            [
+                get_ivt_edges(gtfs_data.stop_times),
+                connectors_data.connectors_transfer.assign(transfer=1),
+                connectors_data.connectors_access,
+                connectors_data.connectors_egress,
+            ],
+            axis=0,
+        )
+        .fillna(0)
+        .map(int)
+    )
 
     return edges
 
@@ -58,28 +72,27 @@ def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame:
     """Calculate generalised time and add it as a column to the 'edges' table.
 
     Args:
-        edges (pd.DataFrame): Edges dataframe. Should include these columns: 
+        edges (pd.DataFrame): Edges dataframe. Should include these columns:
             ['ivt', 'walk', 'wait', 'transfer']
         config (Config): Config object.
 
     Returns:
         pd.DataFrame: Edges dataframe, with the generalised time ("gc") column included.
     """
-    edges['gc'] = edges['ivt'] +\
-        edges['walk'] * config.weight_walk +\
-        edges['wait'] * config.weight_wait +\
-        edges['transfer'] * config.penalty_interchange
+    edges["gc"] = (
+        edges["ivt"]
+        + edges["walk"] * config.weight_walk
+        + edges["wait"] * config.weight_wait
+        + edges["transfer"] * config.penalty_interchange
+    )
 
     # adding unweighted time as well
-    edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1)
+    edges["time"] = edges[["ivt", "walk", "wait"]].sum(1)
 
     return edges
 
 
-def build_graph(
-        edges: pd.DataFrame,
-        vars=['ivt', 'walk', 'wait', 'time', 'gc']
-) -> Graph:
+def build_graph(edges: pd.DataFrame, vars=["ivt", "walk", "wait", "time", "gc"]) -> Graph:
     """Build a network graph from the edges table.
 
     Args:
@@ -89,21 +102,17 @@ def build_graph(
     Returns:
         Graph: Connected GTFS graph
     """
-    eprops = [(x, 'int') for x in vars]
-    g = Graph(
-        edges[['onode', 'dnode']+vars].values,
-        hashed=False,
-        eprops=eprops
-    )
+    eprops = [(x, "int") for x in vars]
+    g = Graph(edges[["onode", "dnode"] + vars].values, hashed=False, eprops=eprops)
     return g
 
 
 def get_shortest_distances_single(
-        graph: Graph,
-        onode: int,
-        dnodes: list[int],
-        max_dist: Optional[float] = None,
-        attribute: str = 'gc'
+    graph: Graph,
+    onode: int,
+    dnodes: list[int],
+    max_dist: Optional[float] = None,
+    attribute: str = "gc",
 ) -> np.ndarray:
     """Get shortest distances from a single origin.
 
@@ -117,20 +126,26 @@ def get_shortest_distances_single(
     Returns:
         np.ndarray: Shortest distances. The first value is the source node.
     """
-    d = shortest_distance(graph, onode, dnodes,
-                          weights=graph.edge_properties[attribute], dense=False,
-                          max_dist=max_dist, directed=True)
+    d = shortest_distance(
+        graph,
+        onode,
+        dnodes,
+        weights=graph.edge_properties[attribute],
+        dense=False,
+        max_dist=max_dist,
+        directed=True,
+    )
     d = np.concatenate([np.array([onode]), d])
 
     return d
 
 
 def get_shortest_distances(
-        graph: Graph,
-        onodes: list[int],
-        dnodes: list[int],
-        max_dist: Optional[float] = None,
-        attribute: str = 'gc'
+    graph: Graph,
+    onodes: list[int],
+    dnodes: list[int],
+    max_dist: Optional[float] = None,
+    attribute: str = "gc",
 ) -> pd.DataFrame:
     """Get shortest distances from a set of origins to a set of destinations.
 
@@ -147,8 +162,9 @@ def get_shortest_distances(
                 and the column indices are the destination nodes.
     """
     n_cpus = multiprocessing.cpu_count() - 1
-    dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes,
-                           max_dist=max_dist, attribute=attribute)
+    dist_wrapper = partial(
+        get_shortest_distances_single, graph, dnodes=dnodes, max_dist=max_dist, attribute=attribute
+    )
     with multiprocessing.Pool(n_cpus) as pool_obj:
         dists = pool_obj.map(dist_wrapper, onodes)
 
@@ -165,13 +181,12 @@ def get_shortest_distances(
 def main(
     config: Config,
     gtfs_data: Optional[GTFSData] = None,
-    connectors_data: Optional[ConnectorsData] = None
+    connectors_data: Optional[ConnectorsData] = None,
 ) -> pd.DataFrame:
     # read
-    logger = get_logger(os.path.join(
-        config.path_outputs, 'log_graph.log'))
+    logger = get_logger(os.path.join(config.path_outputs, "log_graph.log"))
 
-    logger.info('Reading files...')
+    logger.info("Reading files...")
     if gtfs_data is None:
         gtfs_data = GTFSData.from_parquet(path=config.path_outputs)
     if connectors_data is None:
@@ -180,47 +195,40 @@ def main(
     destinations = pd.read_csv(config.path_destinations, index_col=0)
 
     # graph
-    logger.info('Building graph...')
+    logger.info("Building graph...")
     edges = get_all_edges(gtfs_data, connectors_data)
     edges = add_gc(edges=edges, config=config)
     g = build_graph(edges=edges)
 
     # shortest paths
-    logger.info('Calculating shortest distances...')
-    origins['idx'] = range(len(origins))
-    origins['idx'] += len(gtfs_data.stop_times)
-    destinations['idx'] = range(len(destinations))
-    destinations['idx'] += (len(gtfs_data.stop_times)+len(origins))
-
-    onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx'])
-    dnodes_scope = list(
-        destinations[destinations['idx'].isin(edges['dnode'])]['idx'])
+    logger.info("Calculating shortest distances...")
+    origins["idx"] = range(len(origins))
+    origins["idx"] += len(gtfs_data.stop_times)
+    destinations["idx"] = range(len(destinations))
+    destinations["idx"] += len(gtfs_data.stop_times) + len(origins)
+
+    onodes_scope = list(origins[origins["idx"].isin(edges["onode"])]["idx"])
+    dnodes_scope = list(destinations[destinations["idx"].isin(edges["dnode"])]["idx"])
     maxdist = config.end_s - config.start_s
-    distmat = get_shortest_distances(
-        g, onodes=onodes_scope, dnodes=dnodes_scope, 
-        max_dist=maxdist)
+    distmat = get_shortest_distances(g, onodes=onodes_scope, dnodes=dnodes_scope, max_dist=maxdist)
 
     # expand to the full OD space
-    distmat_full = pd.DataFrame(
-        np.inf, index=origins['idx'], columns=destinations['idx'])
+    distmat_full = pd.DataFrame(np.inf, index=origins["idx"], columns=destinations["idx"])
     distmat_full.loc[distmat.index, distmat.columns] = distmat.values
 
     # map labels
-    distmat_full.index = distmat_full.index.map(
-        origins.reset_index().set_index('idx')['name']
-    )
+    distmat_full.index = distmat_full.index.map(origins.reset_index().set_index("idx")["name"])
     distmat_full.columns = distmat_full.columns.map(
-        destinations.reset_index().set_index('idx')['name']
+        destinations.reset_index().set_index("idx")["name"]
     )
 
     # infill intra_zonal
-    distmat_full = distmat_full.\
-        apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0)
-    distmat_full = distmat_full.map(lambda x: np.where(x>=maxdist, np.inf, x))
+    distmat_full = distmat_full.apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0)
+    distmat_full = distmat_full.map(lambda x: np.where(x >= maxdist, np.inf, x))
 
     # save
-    path = os.path.join(config.path_outputs, 'skims.parquet.gzip')
-    logger.info(f'Saving results to {path}...')
-    distmat_full.to_parquet(path, compression='gzip', index=True)
+    path = os.path.join(config.path_outputs, "skims.parquet.gzip")
+    logger.info(f"Saving results to {path}...")
+    distmat_full.to_parquet(path, compression="gzip", index=True)
 
     return distmat_full
diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py
index 419e82e..0299da6 100644
--- a/gtfs_skims/preprocessing.py
+++ b/gtfs_skims/preprocessing.py
@@ -2,8 +2,7 @@
 
 import pyproj
 
-from gtfs_skims.utils import (
-    GTFSData, Config, get_weekday, ts_to_sec, get_logger)
+from gtfs_skims.utils import Config, GTFSData, get_logger, get_weekday, ts_to_sec
 
 
 def filter_day(data: GTFSData, date: int) -> None:
@@ -15,34 +14,18 @@ def filter_day(data: GTFSData, date: int) -> None:
     """
     weekday = get_weekday(date)
     data.calendar = data.calendar[
-        (data.calendar['start_date'] <= date) &
-        (data.calendar['end_date'] >= date) &
-        (data.calendar[weekday] == 1)
+        (data.calendar["start_date"] <= date)
+        & (data.calendar["end_date"] >= date)
+        & (data.calendar[weekday] == 1)
     ]
 
-    data.trips = data.trips[
-        data.trips['service_id'].isin(
-            set(data.calendar['service_id'])
-        )
-    ]
+    data.trips = data.trips[data.trips["service_id"].isin(set(data.calendar["service_id"]))]
 
-    data.routes = data.routes[
-        data.routes['route_id'].isin(
-            set(data.trips['route_id'])
-        )
-    ]
+    data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))]
 
-    data.stop_times = data.stop_times[
-        data.stop_times['trip_id'].isin(
-            set(data.trips['trip_id'])
-        )
-    ]
+    data.stop_times = data.stop_times[data.stop_times["trip_id"].isin(set(data.trips["trip_id"]))]
 
-    data.stops = data.stops[
-        data.stops['stop_id'].isin(
-            set(data.stop_times['stop_id'])
-        )
-    ]
+    data.stops = data.stops[data.stops["stop_id"].isin(set(data.stop_times["stop_id"]))]
 
 
 def filter_time(data: GTFSData, start_time: int, end_time: int) -> None:
@@ -54,29 +37,20 @@ def filter_time(data: GTFSData, start_time: int, end_time: int) -> None:
         end_time (int): End of the time window (seconds from midnight)
     """
     # filter stop times
-    data.stop_times['departure_s'] = data.stop_times['departure_time'].apply(
-        ts_to_sec)
-    data.stop_times['arrival_s'] = data.stop_times['arrival_time'].apply(
-        ts_to_sec)
+    data.stop_times["departure_s"] = data.stop_times["departure_time"].apply(ts_to_sec)
+    data.stop_times["arrival_s"] = data.stop_times["arrival_time"].apply(ts_to_sec)
     data.stop_times = data.stop_times[
-        (data.stop_times['arrival_s'] >= start_time) &
-        (data.stop_times['departure_s'] <= end_time)
+        (data.stop_times["arrival_s"] >= start_time) & (data.stop_times["departure_s"] <= end_time)
     ]
 
     # filter stops
-    data.stops = data.stops[data.stops['stop_id'].isin(
-        set(data.stop_times['stop_id'])
-    )]
+    data.stops = data.stops[data.stops["stop_id"].isin(set(data.stop_times["stop_id"]))]
 
     # filter trips
-    data.trips = data.trips[data.trips['trip_id'].isin(
-        set(data.stop_times['trip_id'])
-    )]
+    data.trips = data.trips[data.trips["trip_id"].isin(set(data.stop_times["trip_id"]))]
 
     # filter routes
-    data.routes = data.routes[data.routes['route_id'].isin(
-        set(data.trips['route_id'])
-    )]
+    data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))]
 
 
 def add_coordinates(data: GTFSData, epsg: int = 27700) -> None:
@@ -87,22 +61,18 @@ def add_coordinates(data: GTFSData, epsg: int = 27700) -> None:
         epsg (int): The target coordinate system
     """
     transformer = pyproj.Transformer.from_crs(
-        pyproj.transformer.CRS('epsg:4326'),
-        pyproj.transformer.CRS(f'epsg:{epsg}'), always_xy=True)
+        pyproj.transformer.CRS("epsg:4326"), pyproj.transformer.CRS(f"epsg:{epsg}"), always_xy=True
+    )
 
-    data.stops['x'], data.stops['y'] = transformer.transform(
-        data.stops['stop_lon'], data.stops['stop_lat']
+    data.stops["x"], data.stops["y"] = transformer.transform(
+        data.stops["stop_lon"], data.stops["stop_lat"]
     )
 
-    data.stops['x'] = data.stops['x'].round().map(int)
-    data.stops['y'] = data.stops['y'].round().map(int)
+    data.stops["x"] = data.stops["x"].round().map(int)
+    data.stops["y"] = data.stops["y"].round().map(int)
 
-    data.stop_times['x'] = data.stop_times['stop_id'].map(
-        data.stops.set_index('stop_id')['x']
-    )
-    data.stop_times['y'] = data.stop_times['stop_id'].map(
-        data.stops.set_index('stop_id')['y']
-    )
+    data.stop_times["x"] = data.stop_times["stop_id"].map(data.stops.set_index("stop_id")["x"])
+    data.stop_times["y"] = data.stop_times["stop_id"].map(data.stops.set_index("stop_id")["y"])
 
 
 def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: int) -> None:
@@ -116,28 +86,22 @@ def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: i
         ymax (int): Max Northing
     """
     data.stops = data.stops[
-        (data.stops['x'] >= xmin) &
-        (data.stops['x'] <= xmax) &
-        (data.stops['y'] >= ymin) &
-        (data.stops['y'] <= ymax)
+        (data.stops["x"] >= xmin)
+        & (data.stops["x"] <= xmax)
+        & (data.stops["y"] >= ymin)
+        & (data.stops["y"] <= ymax)
     ]
 
     # filter stop times
     data.stop_times = data.stop_times[
-        data.stop_times['stop_id'].isin(
-            set(list(data.stops['stop_id']))
-        )
+        data.stop_times["stop_id"].isin(set(list(data.stops["stop_id"])))
     ]
 
     # filter trips
-    data.trips = data.trips[data.trips['trip_id'].isin(
-        set(data.stop_times['trip_id'])
-    )]
+    data.trips = data.trips[data.trips["trip_id"].isin(set(data.stop_times["trip_id"]))]
 
     # filter routes
-    data.routes = data.routes[data.routes['route_id'].isin(
-        set(data.trips['route_id'])
-    )]
+    data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))]
 
 
 def main(config: Config) -> GTFSData:
@@ -149,24 +113,23 @@ def main(config: Config) -> GTFSData:
     Returns:
         GTFSData: Pre-processed GTFS data object.
     """
-    logger = get_logger(os.path.join(
-        config.path_outputs, 'log_preprocessing.log'))
+    logger = get_logger(os.path.join(config.path_outputs, "log_preprocessing.log"))
 
-    logger.info('Reading files...')
+    logger.info("Reading files...")
     data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs)
 
-    logger.info('Time filtering..')
+    logger.info("Time filtering..")
     filter_day(data, config.calendar_date)
     filter_time(data, config.start_s, config.end_s)
     add_coordinates(data, epsg=config.epsg_centroids)
 
     if config.bounding_box is not None:
-        logger.info('Cropping to bounding box..')
+        logger.info("Cropping to bounding box..")
         filter_bounding_box(data, **config.bounding_box)
 
-    logger.info(f'Saving outputs at {config.path_outputs}')
+    logger.info(f"Saving outputs at {config.path_outputs}")
     data.save(config.path_outputs)
 
-    logger.info(f'Preprocessing complete.')
+    logger.info("Preprocessing complete.")
 
     return data
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index 2f28821..4c9dda5 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
-from dataclasses import dataclass
-from datetime import datetime
+
 import logging
 import os
+from dataclasses import dataclass
+from datetime import datetime
 from pathlib import Path
 from typing import Optional
-import yaml
 from zipfile import ZipFile
 
 import pandas as pd
+import yaml
 
 
 def ts_to_sec(x: str) -> int:
@@ -20,8 +21,8 @@ def ts_to_sec(x: str) -> int:
     Returns:
         int: Seconds from midnight
     """
-    s = [int(i) for i in x.split(':')]
-    return 3600*s[0]+60*s[1]+s[2]
+    s = [int(i) for i in x.split(":")]
+    return 3600 * s[0] + 60 * s[1] + s[2]
 
 
 def get_weekday(date: int) -> str:
@@ -33,8 +34,8 @@ def get_weekday(date: int) -> str:
     Returns:
         str: Day name
     """
-    weekday = datetime.strptime(str(date), '%Y%m%d')
-    weekday = datetime.strftime(weekday, '%A').lower()
+    weekday = datetime.strptime(str(date), "%Y%m%d")
+    weekday = datetime.strftime(weekday, "%A").lower()
     return weekday
 
 
@@ -50,8 +51,7 @@ def get_logger(path_output: Optional[str] = None) -> logging.Logger:
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
     handler = logging.StreamHandler()
-    formatter = logging.Formatter(
-        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
     handler.setFormatter(formatter)
     if len(logger.handlers) == 0:
         logger.addHandler(handler)
@@ -63,7 +63,7 @@ def get_logger(path_output: Optional[str] = None) -> logging.Logger:
         if not os.path.exists(parent_dir):
             os.makedirs(parent_dir)
 
-        file_handler = logging.FileHandler(path_output, mode='w')
+        file_handler = logging.FileHandler(path_output, mode="w")
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
 
@@ -104,6 +104,7 @@ class Config:
     ```
 
     """
+
     path_gtfs: str
     path_outputs: str
     path_origins: str
@@ -133,82 +134,23 @@ def from_yaml(cls, path: str) -> Config:
         Returns:
             Config: Config object
         """
-        with open(path, 'r') as f:
+        with open(path, "r") as f:
             config = yaml.safe_load(f)
-        config_flat = {
-            **config['paths'],
-            **config['settings'],
-            'steps': config['steps']
-        }
+        config_flat = {**config["paths"], **config["settings"], "steps": config["steps"]}
         return cls(**config_flat)
 
     def __repr__(self) -> str:
-        s = 'Config file\n'
-        s += '-'*50 + '\n'
+        s = "Config file\n"
+        s += "-" * 50 + "\n"
         s += yaml.dump(self.__dict__)
         return s
 
 
-# @dataclass
-# class GTFSData:
-#     calendar: pd.DataFrame
-#     routes: pd.DataFrame
-#     stops: pd.DataFrame
-#     stop_times: pd.DataFrame
-#     trips: pd.DataFrame
-
-#     @classmethod
-#     def from_gtfs(cls, path_gtfs: str) -> GTFSData:
-#         """Load GTFS tables from a standard zipped GTFS file.
-
-#         Args:
-#             path_gtfs (str): Path to a zipped GTFS dataset.
-
-#         Returns:
-#             GTFSData: GTFS data object.
-#         """
-#         data = {}
-#         with ZipFile(path_gtfs, 'r') as zf:
-#             for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
-#                 with zf.open(f'{name}.txt') as f:
-#                     data[name] = pd.read_csv(f, low_memory=False)
-#         return cls(**data)
-
-#     @classmethod
-#     def from_parquet(cls, path: str) -> GTFSData:
-#         """Construct class from pre-processed GTFS tables in Parquet format.
-
-#         Args:
-#             path (str): Path to tables.
-
-#         Returns:
-#             GTFSData: GTFS data object.
-#         """
-#         data = {}
-#         for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
-#             data[name] = pd.read_parquet(
-#                 os.path.join(path, f'{name}.parquet.gzip'))
-#         return cls(**data)
-
-#     def save(self, path_outputs: str) -> None:
-#         """Export all tables in zipped parquet format.
-
-#         Args:
-#             path_outputs (str): Directory to save outputs.
-#         """
-#         if not os.path.exists(path_outputs):
-#             os.makedirs(path_outputs)
-
-#         for k, v in self.__dict__.items():
-#             v.to_parquet(os.path.join(
-#                 path_outputs, f'{k}.parquet.gzip'), compression='gzip')
-
-
 @dataclass
 class Data:
     @classmethod
     def from_gtfs(cls, path_gtfs: str) -> Data:
-        """Load GTFS tables from a standard zipped GTFS file. 
+        """Load GTFS tables from a standard zipped GTFS file.
 
         Args:
             path_gtfs (str): Path to a zipped GTFS dataset.
@@ -217,9 +159,9 @@ def from_gtfs(cls, path_gtfs: str) -> Data:
             GTFSData: GTFS data object.
         """
         data = {}
-        with ZipFile(path_gtfs, 'r') as zf:
+        with ZipFile(path_gtfs, "r") as zf:
             for name in cls.__annotations__.keys():
-                with zf.open(f'{name}.txt') as f:
+                with zf.open(f"{name}.txt") as f:
                     data[name] = pd.read_csv(f, low_memory=False)
         return cls(**data)
 
@@ -235,8 +177,7 @@ def from_parquet(cls, path: str) -> Data:
         """
         data = {}
         for name in cls.__annotations__.keys():
-            data[name] = pd.read_parquet(
-                os.path.join(path, f'{name}.parquet.gzip'))
+            data[name] = pd.read_parquet(os.path.join(path, f"{name}.parquet.gzip"))
         return cls(**data)
 
     def save(self, path_outputs: str) -> None:
@@ -249,8 +190,7 @@ def save(self, path_outputs: str) -> None:
             os.makedirs(path_outputs)
 
         for k, v in self.__dict__.items():
-            v.to_parquet(os.path.join(
-                path_outputs, f'{k}.parquet.gzip'), compression='gzip')
+            v.to_parquet(os.path.join(path_outputs, f"{k}.parquet.gzip"), compression="gzip")
 
 
 @dataclass
diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py
index c6ded4e..fd70251 100644
--- a/gtfs_skims/variables.py
+++ b/gtfs_skims/variables.py
@@ -6,17 +6,17 @@
 # source: https://developers.google.com/transit/gtfs/reference#routestxt
 # and https://developers.google.com/transit/gtfs/reference/extended-route-types
 ROUTE_TYPES = {
-    0: 'tram',  # Tram, Streetcar, Light rail.
-    1: 'underground',  # Subway, Metro.
-    2: 'rail',  # Rail. Used for intercity or long-distance travel.
-    3: 'bus',  # Bus. Used for short- and long-distance bus routes.
-    4: 'ferry',  # Ferry. Used for short- and long-distance boat service.
-    5: 'cable',
-    6: 'cable aerial',
-    7: 'furnicular',  # Funicular. Any rail system designed for steep inclines.
-    11: 'trolley',  # Trolleybus.
-    12: 'monorail',  # Monorail.
-    200: 'coach',  # Coach Service
-    401: 'undergound',  # Metro Service
-    402: 'underground',  # Underground Service
-}
\ No newline at end of file
+    0: "tram",  # Tram, Streetcar, Light rail.
+    1: "underground",  # Subway, Metro.
+    2: "rail",  # Rail. Used for intercity or long-distance travel.
+    3: "bus",  # Bus. Used for short- and long-distance bus routes.
+    4: "ferry",  # Ferry. Used for short- and long-distance boat service.
+    5: "cable",
+    6: "cable aerial",
+    7: "furnicular",  # Funicular. Any rail system designed for steep inclines.
+    11: "trolley",  # Trolleybus.
+    12: "monorail",  # Monorail.
+    200: "coach",  # Coach Service
+    401: "undergound",  # Metro Service
+    402: "underground",  # Underground Service
+}
diff --git a/requirements/base.txt b/requirements/base.txt
index b1ccdb9..4c3de05 100755
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1,8 +1,9 @@
-# this dependency exists so that the base file is not empty
-# it was chosen since it is a dependency that is included in any python environment already
+click
 fastparquet
 graph-tool
 numpy
 pandas
 pyproj
+pyyaml
+yaml
 zipp
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 824ee6e..b2486e7 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,9 +12,9 @@ def test_content(response):
 
 import pytest
 
-from gtfs_skims.utils import Config, GTFSData, ConnectorsData
+from gtfs_skims.utils import Config, ConnectorsData, GTFSData
 
-TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data')
+TEST_DATA_DIR = os.path.join(Path(__file__).parent, "test_data")
 
 
 @pytest.fixture
@@ -29,19 +29,19 @@ def response():
 
 @pytest.fixture
 def config():
-    return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml'))
+    return Config.from_yaml(os.path.join(TEST_DATA_DIR, "config_demo.yaml"))
 
 
 @pytest.fixture
 def gtfs_data():
-    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip'))
+    return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, "iow-bus-gtfs.zip"))
 
 
 @pytest.fixture
 def gtfs_data_preprocessed():
-    return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs'))
+    return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, "outputs"))
 
 
 @pytest.fixture
 def connectors_data():
-    return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs'))
+    return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, "outputs"))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a0aa408..9e61d4a 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -6,7 +6,7 @@
 
 from gtfs_skims import cli
 
-TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data')
+TEST_DATA_DIR = os.path.join(Path(__file__).parent, "test_data")
 
 
 def test_command_line_interface():
@@ -19,8 +19,7 @@ def test_command_line_interface():
     assert (
         "Console script for gtfs_skims.\n\nOptions:\n  "
         "--version  Show the version and exit.\n  "
-        "--help     Show this message and exit.\n"
-        in help_result.output
+        "--help     Show this message and exit.\n" in help_result.output
     )
 
 
@@ -28,22 +27,20 @@ def test_run_steps_saves_outputs(tmpdir):
     runner = CliRunner()
     result = runner.invoke(
         cli.cli,
-        ['run', os.path.join(TEST_DATA_DIR, 'config_demo.yaml'),
-         '--output_directory_override', tmpdir]
+        [
+            "run",
+            os.path.join(TEST_DATA_DIR, "config_demo.yaml"),
+            "--output_directory_override",
+            tmpdir,
+        ],
     )
 
     assert result.exit_code == 0
 
-    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
-        assert os.path.exists(
-            os.path.join(tmpdir, f'{x}.parquet.gzip')
-        )
+    for x in ["calendar", "routes", "stops", "stop_times", "trips"]:
+        assert os.path.exists(os.path.join(tmpdir, f"{x}.parquet.gzip"))
 
-    for x in ['transfer', 'access', 'egress']:
-        assert os.path.exists(
-            os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
-        )
+    for x in ["transfer", "access", "egress"]:
+        assert os.path.exists(os.path.join(tmpdir, f"connectors_{x}.parquet.gzip"))
 
-    assert os.path.exists(
-        os.path.join(tmpdir, f'skims.parquet.gzip')
-    )
+    assert os.path.exists(os.path.join(tmpdir, "skims.parquet.gzip"))
diff --git a/tests/test_connectors.py b/tests/test_connectors.py
index f47fe9a..e18b27f 100644
--- a/tests/test_connectors.py
+++ b/tests/test_connectors.py
@@ -1,12 +1,11 @@
-from collections import defaultdict
 import itertools
 import os
+from collections import defaultdict
 
 import numpy as np
 import pytest
-import unittest.mock as mock
 
-from gtfs_skims import connectors, preprocessing
+from gtfs_skims import connectors
 
 
 @pytest.fixture()
@@ -28,17 +27,17 @@ def find_index(coords, x, y, z):
 
 def get_valid_points(coords, source_idx, max_trasfer_dist):
     dcoords = coords - coords[source_idx]
-    walk = (dcoords[:, :2]**2).sum(1)**0.5  # euclidean distance on xy
+    walk = (dcoords[:, :2] ** 2).sum(1) ** 0.5  # euclidean distance on xy
     wait = dcoords[:, 2] - walk
 
-    is_valid = (wait > 0) & ((walk+wait) <= max_trasfer_dist)
+    is_valid = (wait > 0) & ((walk + wait) <= max_trasfer_dist)
 
     return is_valid
 
 
-@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
+@pytest.mark.parametrize("source", [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
 def test_query_all_valid_included(points, source):
-    """ All valid points are included in the query results """
+    """All valid points are included in the query results"""
     source_idx = find_index(points, *source)
     maxdist = 10
     is_valid = get_valid_points(points, source_idx, maxdist)
@@ -50,9 +49,9 @@ def test_query_all_valid_included(points, source):
     assert is_valid[dest].sum() == is_valid.sum()
 
 
-@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
+@pytest.mark.parametrize("source", [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)])
 def test_query_all_included_valid(points, source):
-    """ All results from the query are valid """
+    """All results from the query are valid"""
     source_idx = find_index(points, *source)
     maxdist = 10
     is_valid = get_valid_points(points, source_idx, maxdist)
@@ -91,15 +90,13 @@ def get_o_service_transfers(conn, services_d):
     transfer_times = conn.wait + conn.walk
     d = defaultdict(list)
     for i in range(len(services_d)):
-        d[(conn.ods[i, 0], services_d[i])
-          ].append(transfer_times[i])
+        d[(conn.ods[i, 0], services_d[i])].append(transfer_times[i])
     return d
 
 
 def test_filter_nearest_service(transfer_connectors):
     np.random.seed(0)
-    services = np.random.randint(
-        0, 2, size=transfer_connectors.coords.shape[0])
+    services = np.random.randint(0, 2, size=transfer_connectors.coords.shape[0])
     services_d = services[transfer_connectors.ods[:, 1]]
 
     # for every origin-service pair there are multiple connections
@@ -131,36 +128,24 @@ def test_get_transfer_array(gtfs_data_preprocessed, config):
 
 def test_get_od_pairs():
     ods = connectors.query_pairs_od(
-        np.array([[0, 0], [1, 1]]),
-        np.array([[0.5, 0.5], [2, 1], [2, 2]]),
-        radius=1
+        np.array([[0, 0], [1, 1]]), np.array([[0.5, 0.5], [2, 1], [2, 2]]), radius=1
     )
-    expected = np.array([
-        [0, 0],
-        [1, 0],
-        [1, 1]
-    ])
+    expected = np.array([[0, 0], [1, 0], [1, 1]])
     np.testing.assert_equal(ods, expected)
 
 
 def test_get_od_walk():
     egress = connectors.AccessEgressConnectors(
-        np.array([[0, 0], [1, 1]]),
-        np.array([[0.5, 0.5], [2, 1], [2, 2]]),
-        max_transfer_distance=1
+        np.array([[0, 0], [1, 1]]), np.array([[0.5, 0.5], [2, 1], [2, 2]]), max_transfer_distance=1
     )
     walk = egress.walk
-    expected = np.array([
-        (2*0.5**2)**0.5, (2*0.5**2)**0.5, 1
-    ])
+    expected = np.array([(2 * 0.5**2) ** 0.5, (2 * 0.5**2) ** 0.5, 1])
     np.testing.assert_almost_equal(walk, expected)
 
 
 def test_convert_distance_3d():
     access = connectors.AccessEgressConnectors(
-        np.array([[0, 0, 0]]),
-        np.array([[1, 1, 1]]),
-        max_transfer_distance=1
+        np.array([[0, 0, 0]]), np.array([[1, 1, 1]]), max_transfer_distance=1
     )
     assert len(access.ods) == 1  # radius has been adjusted to 3D space
 
@@ -185,17 +170,15 @@ def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir):
     config.path_outputs = tmpdir
     conn = connectors.main(config=config, data=gtfs_data_preprocessed)
     stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times)))
-    assert all(np.isin(conn.connectors_access['dnode'], stop_time_ids))
-    assert all(np.isin(conn.connectors_egress['onode'], stop_time_ids))
-    assert np.isin(conn.connectors_access['onode'], stop_time_ids).sum() == 0
-    assert np.isin(conn.connectors_egress['dnode'], stop_time_ids).sum() == 0
-    assert conn.connectors_access['onode'].max() < conn.connectors_egress['dnode'].min()
+    assert all(np.isin(conn.connectors_access["dnode"], stop_time_ids))
+    assert all(np.isin(conn.connectors_egress["onode"], stop_time_ids))
+    assert np.isin(conn.connectors_access["onode"], stop_time_ids).sum() == 0
+    assert np.isin(conn.connectors_egress["dnode"], stop_time_ids).sum() == 0
+    assert conn.connectors_access["onode"].max() < conn.connectors_egress["dnode"].min()
 
 
 def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir):
     config.path_outputs = tmpdir
     connectors.main(config=config, data=gtfs_data_preprocessed)
-    for x in ['transfer', 'access', 'egress']:
-        assert os.path.exists(
-            os.path.join(tmpdir, f'connectors_{x}.parquet.gzip')
-        )
+    for x in ["transfer", "access", "egress"]:
+        assert os.path.exists(os.path.join(tmpdir, f"connectors_{x}.parquet.gzip"))
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 6667e90..009daea 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -1,10 +1,9 @@
-import os
+from unittest.mock import Mock
 
-from graph_tool import Graph
 import numpy as np
 import pandas as pd
 import pytest
-from unittest.mock import Mock
+from graph_tool import Graph
 
 from gtfs_skims import graph
 
@@ -20,34 +19,26 @@ def mock_config(mocker):
 
 @pytest.fixture()
 def small_graph() -> Graph:
-    edges = pd.DataFrame({
-        'onode': [0, 0, 1, 2],
-        'dnode': [1, 2, 3, 3],
-        'gc': [10, 20, 15, 4]
-    })
-    return graph.build_graph(edges, vars=['gc'])
+    edges = pd.DataFrame({"onode": [0, 0, 1, 2], "dnode": [1, 2, 3, 3], "gc": [10, 20, 15, 4]})
+    return graph.build_graph(edges, vars=["gc"])
 
 
 @pytest.fixture()
 def small_graph_birectional() -> Graph:
-    edges = pd.DataFrame({
-        'onode': [0, 0, 1, 2, 1, 2, 3, 3],
-        'dnode': [1, 2, 3, 3, 0, 0, 1, 2],
-        'gc': [10, 20, 15, 4, 10, 20, 15, 4]
-    })
-    return graph.build_graph(edges, vars=['gc'])
+    edges = pd.DataFrame(
+        {
+            "onode": [0, 0, 1, 2, 1, 2, 3, 3],
+            "dnode": [1, 2, 3, 3, 0, 0, 1, 2],
+            "gc": [10, 20, 15, 4, 10, 20, 15, 4],
+        }
+    )
+    return graph.build_graph(edges, vars=["gc"])
 
 
 def test_get_ivt_times():
-    stop_times = pd.DataFrame({
-        'trip_id': [0, 1, 0, 1],
-        'departure_s': [100, 105, 120, 150]
-    })
+    stop_times = pd.DataFrame({"trip_id": [0, 1, 0, 1], "departure_s": [100, 105, 120, 150]})
     ivt_edges = graph.get_ivt_edges(stop_times)
-    expected = np.array([
-        [0, 2, 20],
-        [1, 3, 45]
-    ])
+    expected = np.array([[0, 2, 20], [1, 3, 45]])
     np.testing.assert_equal(ivt_edges.values, expected)
 
 
@@ -55,27 +46,23 @@ def test_get_all_edges(gtfs_data_preprocessed, connectors_data):
     edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data)
 
     # all connections are included
-    len_expected = len(gtfs_data_preprocessed.stop_times) - \
-        gtfs_data_preprocessed.stop_times['trip_id'].nunique()
+    len_expected = (
+        len(gtfs_data_preprocessed.stop_times)
+        - gtfs_data_preprocessed.stop_times["trip_id"].nunique()
+    )
     len_expected += len(connectors_data.connectors_transfer)
     len_expected += len(connectors_data.connectors_access)
     len_expected += len(connectors_data.connectors_egress)
     assert len(edges) == len_expected
 
     # all variables are included
-    assert list(edges.columns) == [
-        'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer']
+    assert list(edges.columns) == ["onode", "dnode", "ivt", "walk", "wait", "transfer"]
 
 
 def test_calculate_gc(mock_config):
-    edges = pd.DataFrame({
-        'ivt': [100, 200],
-        'walk': [30, 10],
-        'wait': [10, 5],
-        'transfer': [0, 1]
-    })
+    edges = pd.DataFrame({"ivt": [100, 200], "walk": [30, 10], "wait": [10, 5], "transfer": [0, 1]})
     graph.add_gc(edges, mock_config)
-    assert list(edges['gc']) == [190, 835]
+    assert list(edges["gc"]) == [190, 835]
 
 
 def test_get_shortest_distance_single(small_graph):
@@ -86,13 +73,8 @@ def test_get_shortest_distance_single(small_graph):
 
 
 def test_get_distance_matrix(small_graph_birectional):
-    distmat = graph.get_shortest_distances(
-        small_graph_birectional, [0, 1, 2], [1, 2])
-    expected = np.array([
-        [10, 20],
-        [0, 19],
-        [19, 0]
-    ])
+    distmat = graph.get_shortest_distances(small_graph_birectional, [0, 1, 2], [1, 2])
+    expected = np.array([[10, 20], [0, 19], [19, 0]])
     assert list(distmat.index) == [0, 1, 2]
     assert list(distmat.columns) == [1, 2]
 
@@ -103,8 +85,9 @@ def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir)
     origins = pd.read_csv(config.path_origins, index_col=0)
     destinations = pd.read_csv(config.path_destinations, index_col=0)
     config.path_outputs = tmpdir
-    distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed,
-                         connectors_data=connectors_data)
+    distmat = graph.main(
+        config=config, gtfs_data=gtfs_data_preprocessed, connectors_data=connectors_data
+    )
 
     assert list(distmat.index) == list(origins.index)
     assert list(distmat.columns) == list(destinations.index)
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index acba58e..c3b7a9d 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -1,24 +1,21 @@
 import os
-from pathlib import Path
-import pytest
 
 from gtfs_skims import preprocessing
 
 
 def test_filter_date(gtfs_data):
-    a = 1
     assert 14 in gtfs_data.calendar.service_id.values
     preprocessing.filter_day(gtfs_data, 20180507)
     assert list(gtfs_data.calendar.service_id) == [14]
-    assert set(gtfs_data.trips['service_id']) == set([14])
+    assert set(gtfs_data.trips["service_id"]) == set([14])
 
 
 def test_filter_time(gtfs_data):
-    start_time = 9*3600
-    end_time = 10*3600
+    start_time = 9 * 3600
+    end_time = 10 * 3600
     preprocessing.filter_time(gtfs_data, start_time, end_time)
-    assert gtfs_data.stop_times['arrival_s'].min() >= start_time
-    assert gtfs_data.stop_times['departure_s'].max() <= end_time
+    assert gtfs_data.stop_times["arrival_s"].min() >= start_time
+    assert gtfs_data.stop_times["departure_s"].max() <= end_time
 
 
 def test_projected_coords_within_bounds(gtfs_data):
@@ -27,10 +24,10 @@ def test_projected_coords_within_bounds(gtfs_data):
     xmin, ymin = 423104, 69171
     xmax, ymax = 471370, 101154
 
-    assert gtfs_data.stops['x'].min() > xmin
-    assert gtfs_data.stops['x'].max() < xmax
-    assert gtfs_data.stops['y'].min() > ymin
-    assert gtfs_data.stops['y'].max() < ymax
+    assert gtfs_data.stops["x"].min() > xmin
+    assert gtfs_data.stops["x"].max() < xmax
+    assert gtfs_data.stops["y"].min() > ymin
+    assert gtfs_data.stops["y"].max() < ymax
 
 
 def test_within_bounding_box(gtfs_data):
@@ -39,21 +36,18 @@ def test_within_bounding_box(gtfs_data):
     # filter for Cowes
     xmin, ymin = 447477, 92592
     xmax, ymax = 451870, 96909
-    assert gtfs_data.stops['x'].min() < xmin
-    preprocessing.filter_bounding_box(
-        gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
+    assert gtfs_data.stops["x"].min() < xmin
+    preprocessing.filter_bounding_box(gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
 
-    assert gtfs_data.stops['x'].min() > xmin
-    assert gtfs_data.stops['x'].max() < xmax
-    assert gtfs_data.stops['y'].min() > ymin
-    assert gtfs_data.stops['y'].max() < ymax
+    assert gtfs_data.stops["x"].min() > xmin
+    assert gtfs_data.stops["x"].max() < xmax
+    assert gtfs_data.stops["y"].min() > ymin
+    assert gtfs_data.stops["y"].max() < ymax
 
 
 def test_run_preprocessing_demo(config, tmpdir):
-    path_outputs = os.path.join(tmpdir, 'outputs')
+    path_outputs = os.path.join(tmpdir, "outputs")
     config.path_outputs = path_outputs
     preprocessing.main(config)
-    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
-        assert os.path.exists(
-            os.path.join(path_outputs, f'{x}.parquet.gzip')
-        )
+    for x in ["calendar", "routes", "stops", "stop_times", "trips"]:
+        assert os.path.exists(os.path.join(path_outputs, f"{x}.parquet.gzip"))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 5585b62..eba5713 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -6,32 +6,30 @@
 
 
 def test_parse_timestamp():
-    assert utils.ts_to_sec('00:00:00') == 0
-    assert utils.ts_to_sec('10:01:01') == 36061
+    assert utils.ts_to_sec("00:00:00") == 0
+    assert utils.ts_to_sec("10:01:01") == 36061
 
 
 def test_get_logger(tmpdir):
-    logger = utils.get_logger(os.path.join(tmpdir, 'logs', 'log.log'))
-    logger.info('test')
+    logger = utils.get_logger(os.path.join(tmpdir, "logs", "log.log"))
+    logger.info("test")
 
 
 def test_weekday():
-    assert utils.get_weekday(20231201) == 'friday'
+    assert utils.get_weekday(20231201) == "friday"
 
 
 def test_load_config(config):
-    'path_gtfs' in config.__dict__
+    "path_gtfs" in config.__dict__
 
 
 def test_load_gtfs(gtfs_data):
-    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
+    for x in ["calendar", "routes", "stops", "stop_times", "trips"]:
         assert isinstance(getattr(gtfs_data, x), pd.DataFrame)
 
 
 def test_cache_gtfs(gtfs_data, tmpdir):
     gtfs_data.save(tmpdir)
     gtfs_cached = utils.GTFSData.from_parquet(tmpdir)
-    for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']:
-        pd.testing.assert_frame_equal(
-            getattr(gtfs_data, x), getattr(gtfs_cached, x)
-        )
+    for x in ["calendar", "routes", "stops", "stop_times", "trips"]:
+        pd.testing.assert_frame_equal(getattr(gtfs_data, x), getattr(gtfs_cached, x))

From ce08a8e89d696a7a08aad4fe43fc02cd83b3f1c7 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 15 Dec 2023 09:16:38 +0000
Subject: [PATCH 26/30] linting

---
 gtfs_skims/cli.py        |  3 +--
 gtfs_skims/connectors.py | 10 +++++++---
 gtfs_skims/utils.py      |  3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py
index 5c93373..e170fb0 100755
--- a/gtfs_skims/cli.py
+++ b/gtfs_skims/cli.py
@@ -13,8 +13,7 @@
 @click.version_option(package_name="gtfs_skims")
 @click.group
 def cli(args=None):
-    """Console script for gtfs_skims."""
-    click.echo("Console script for Argo (gtfs_skims).")
+    """Console script for Argo (gtfs_skims)."""
     return 0
 
 
diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py
index b611756..77d574c 100644
--- a/gtfs_skims/connectors.py
+++ b/gtfs_skims/connectors.py
@@ -18,7 +18,7 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array:
             has always greater z coordinate than the origin point.
 
     Args:
-        coords (np.array): Point coordinates (x, y, z)
+        coords (np.ndarray): Point coordinates (x, y, z)
         radius (float): Maximum distance between points
 
     Returns:
@@ -33,9 +33,13 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array:
 
 
 class TransferConnectors:
-    """Manages transfer connectors."""
-
     def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None:
+        """Manages transfer connectors.
+
+        Args:
+            coords (np.ndarray): Point coordinates (x, y, z)
+            max_transfer_distance (float): Maximum distance between points
+        """
         self.coords = coords
         radius = max_transfer_distance * (2**0.5)
         self.ods = query_pairs(coords, radius=radius)
diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py
index 4c9dda5..b673141 100644
--- a/gtfs_skims/utils.py
+++ b/gtfs_skims/utils.py
@@ -2,6 +2,7 @@
 
 import logging
 import os
+from abc import ABC
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -147,7 +148,7 @@ def __repr__(self) -> str:
 
 
 @dataclass
-class Data:
+class Data(ABC):
     @classmethod
     def from_gtfs(cls, path_gtfs: str) -> Data:
         """Load GTFS tables from a standard zipped GTFS file.

From c1486f0b7c120c58ac95afef026b7676f917701e Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 15 Dec 2023 09:30:13 +0000
Subject: [PATCH 27/30] fix tests

---
 tests/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 9e61d4a..4f60664 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -17,7 +17,7 @@ def test_command_line_interface():
     help_result = runner.invoke(cli.cli, ["--help"])
     assert help_result.exit_code == 0
     assert (
-        "Console script for gtfs_skims.\n\nOptions:\n  "
+        "Console script for Argo (gtfs_skims).\n\nOptions:\n  "
         "--version  Show the version and exit.\n  "
         "--help     Show this message and exit.\n" in help_result.output
     )

From 7f048b4004d517634b5f7fc4639545e76d091a42 Mon Sep 17 00:00:00 2001
From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com>
Date: Fri, 15 Dec 2023 11:39:50 +0000
Subject: [PATCH 28/30] Fix for github.com/arup-group/genet/issues/213

---
 gtfs_skims/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gtfs_skims/__init__.py b/gtfs_skims/__init__.py
index 24d1f27..36ba3aa 100755
--- a/gtfs_skims/__init__.py
+++ b/gtfs_skims/__init__.py
@@ -1,5 +1,8 @@
 """Top-level module for gtfs_skims."""
+import pyproj
 
 __author__ = """Theodore-Chatziioannou"""
 __email__ = "Theodore.Chatziioannou@arup.com"
 __version__ = "0.1.0"
+
+pyproj.network.set_network_enabled(False)

From 7d97c33ef36dc3cbd3c205123f1562c18384fec9 Mon Sep 17 00:00:00 2001
From: "Theodore.Chatziioannou" <Theodore.Chatziioannou@arup.com>
Date: Fri, 15 Dec 2023 11:44:56 +0000
Subject: [PATCH 29/30] remove tests init file

---
 tests/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000

From e90e2a3904de2335eedd42e6928161f5022d236b Mon Sep 17 00:00:00 2001
From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:53:42 +0000
Subject: [PATCH 30/30] Clean up text duplication

---
 README.md     | 9 ++++-----
 docs/index.md | 8 +-------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 7753847..87cfe61 100755
--- a/README.md
+++ b/README.md
@@ -7,13 +7,12 @@
 [![Daily CI Build](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml/badge.svg)](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml)
 [![Documentation](https://github.com/arup-group/gtfs_skims/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages)](https://arup-group.github.io/gtfs_skims)
 
-<!--- --8<-- [end:docs] -->
-
-
-Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. 
-By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. 
+Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files.
+By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale.
 For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
 
+<!--- --8<-- [end:docs] -->
+
 ## Documentation
 
 For more detailed instructions, see our [documentation](https://arup-group.github.io/gtfs_skims/latest).
diff --git a/docs/index.md b/docs/index.md
index 3909da0..8d4a99a 100755
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,7 +1 @@
---8<-- "README.md:docs"
-
-# Argo
-
-Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. 
-By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. 
-For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine).
\ No newline at end of file
+--8<-- "README.md:docs"
\ No newline at end of file