From f10d1297c11e369a30f00a6c2556836422ba3138 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 1 Dec 2023 09:28:25 +0000 Subject: [PATCH 01/30] add some test data --- tests/test_data/config_demo.yaml | 30 ++++++++++++++++++++++++++++++ tests/test_data/iow-bus-gtfs.zip | Bin 0 -> 26630 bytes 2 files changed, 30 insertions(+) create mode 100644 tests/test_data/config_demo.yaml create mode 100755 tests/test_data/iow-bus-gtfs.zip diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml new file mode 100644 index 0000000..66db500 --- /dev/null +++ b/tests/test_data/config_demo.yaml @@ -0,0 +1,30 @@ +paths: + path_gtfs: /mnt/efs/otp/gtfs_transfers/iow-bus-gtfs.zip + path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow + path_zone_centroids: /mnt/efs/zones/msoa_centroids.geojson # path to zone centroids to skim + + +settings: + calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file. + start_s : 32400 # sec | Start time of the journey. + end_s : 41400 # sec | Max end time of a journey. + walk_distance_threshold : 2000 # m | Max walk distance in a leg + walk_speed : 4.5 # kph | Walking speed + crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances + max_transfer_time : 1800 # Max combined time of walking and waiting (sec) + k : 500 # max nearest neighbours when calculating distances + max_wait : 1800 # sec | Max wait time at a stop + bounding_box : null + +steps: + - preprocessing + - connectors + - graph + +# Settings for AWS batch (Optional) +aws_job_definition: + image: "815306348607.dkr.ecr.eu-west-1.amazonaws.com/ukpop:latest" + memory: 32000 + vcpus: 4 + job_definition_name: "gtfs-ukpop11" + job_queue: "general_purpose_queue" \ No newline at end of file diff --git a/tests/test_data/iow-bus-gtfs.zip b/tests/test_data/iow-bus-gtfs.zip new file mode 100755 index 0000000000000000000000000000000000000000..1bcb7ae093b0d7d6cbdc8f95f9727cef26ac61e7 GIT binary patch literal 26630 zcmb?@by$>5^eCNzONpRJihv?Wh%_uMEg~hM(%s!5pu~boEad_&DJYFJqM%4GAT2E* z-F;^l_4~ei@9%l;ANP-$eczcgr_Y%)@4mYl%D8ycSXfv@SoU%VwFy=`-birAgpGwo z0zMyEnAtz^y6Nias&!XIy+gSus!K()S2MU|lhQ?tqP92lGF}cFzfd`s~o|e7coA$3I4y4)xuLm-lIt zYR#-PRtrl;s8sdKi3|c|I|7bhUD@H+J=MG-Dwn`GfV$j)HMJnohuml(=Vhbart2 zjV5lDx=LxUK;^qs=9S6Rzji3A7KxfRwNR^1r8hn<*eAxa zB~}fR$UW4JyR$%z_!)7P)77Nq8cT?|^Z8WU_tF==(v8~trl&9UzI{-WmSkBn_`~M$ z=eM)OQ3**BYh?O{af5;gat1oBfTb$D_H~~}8uSx&Cb%2%9B(xriuaY3WFiu^-#7{l z`{m2IwS*Ve@&%|Cc8kCk=q5B`4<7Lv7FRubsQ#GT*4AG^QqW}cp7-Wsy<0bt&>Y35 zHi-$BzCTg-8)7==KXEh@&3JDeHBDuL{rJ=B-BF>$U#szjRK_)F!w2EUQk-*lYxLU- zML51)PGh)2#`Cf#fb+$@2eIFZdEWA;OWc=X}W35(6HS+CEfz=>09OKyN zuN$`&Jxi6l4?ZUzz$O1{7&YRu2X7Rxu!@ecvB-hBU0fX;ja{wm&P@F(t*Pip%zkzk z_nWfkv9WHvw)>djgBO)V^e>DAcwd-`q=}ureCwhZS#NeP`aGHc*pZ<@!Y7%Yc2U`-k9eB4A(gbR)Lmc%s4oWZU@kfDw#Ko-C~f%$}}gobGve8Vv-T zY@Z$v8lNr)WSn*_tTCRRR0bTjZb+WC9vT`4By1mR4P>|(67VOwsoEUtTJ4v{)SDR) zY;M(^Y^u#}7SEq1c(YuOBg{;9&-+|=WP8Y$RNMIAtCz#9Sww!N@%9c6%lz~2s;uLZ zc{f9pGJlQ8U7i-a?I$}TVw8n)rC433H#ktTBvkd&SlTb14|{53EBR_L(97-P_PGkv zRAb2{AG&XIzPHWWh34T2O)qN#80W((9!DHoh$b}MpZZYV;kxO5L~VqN+-mmT9x?a+ zu*m;*F4=y`pQ3m@d7vxgM!U&ZLx$q#-#Y}qs(*Tt$ti2aEhKKZZI?f1$5lP{YGENZ zHgA;_YK1~eL*?n*n%(5@Q(!*W+R=9!7)glcyA{v z$7kC_{6mh<2j-h@NXY6yOl9@wYXbddyxZJf87($~Ey}BlX5$5}$4klwzUL_~d^ad| zLt>4%X>G({-SOY>p;N&&8hpV;u(!pRLZ*dlacM^A2MK;!UL%ddB2l(By{cAFL5=Z~ zEXh+;**RpzB#ygF*j5k1j92ibvHAk9RKGz91!DW-$Y}5QX9EkP_re*E1g>rlN}jL3 z5t6q$-cR^tOW()<2AI|ty4GhWd}>-2h=j~OyYgkr;G`CHkpk#Kr{QX!lU~NX+v=Qy zHs`MPxiAXFFvCSkMhugS+IhTEEEfff26>X}+Yi;vMNOha4zFzfw{jijv-^rJ9K7BQU{mVU;e zt^I4#Pcr(br>e%sY!8m6Klj|#id2p*UD(`Mz!Nisi%96Vt7bURL+z{eFBK1+`>r1R zpxYUK%?atMe5CV>(ebNn0`zZ-Js@`m-6MbK(BoD1>$I zc?jJ9KF={0VnJd1pPMhT+r6>V)9&OJ5NrBooG3s$3!MwYQcC7ZAWPCsu^#CJU^ty=;L#9XxaK-km4Q{BJ!s*a{Lgm+&>!3VU`a> zd^UK2;xBL6`u$k*mtQi#r}{@WuEP|kl>CXPoI5e2a+NK39)l8o4@p)q1}Yb z$!*;J!{N75#YU8cTUf}L>#p99$D$@AJ&WNr3!C}sg99HlBu7;<0!FP33AAwAv@th) z*!5Qh)HD0fd-RQIPkbM)d;iwIDg|4=pv4#7+qZBq_;G71`N^7*(KP><{twCC=-03Z zT3MqNOLihE>Vt;a!GO0*K1t~k?fSI@DiA9h=ADb)-hFI}s&)!CUSz(g50P1 z(%Zm-XQ2 z;YH06C-Dfo-rS4NLRNSk#mhjoYrgM}`hH=4Kdl@Chk{8qa^EkSTE(~gzhJ*#&?@1V zKD??x%tfKwPvs$wdz$gLh|L!k6YFI@mYBMp76U-#?Dfx zU;wnZcd=mx)-ASIap9xy7vYi{Zg)<1J5dn^2NXq(+c(|VgSu#LUfIoVyqcMor*mTso=vuA4Ky?K_}%qsH#fJ zKUSGhCx9(RhZ;fEG6UyY$RLBhLu#T9%X4h^)fRN?xV1B{~oG zb?atv!=COfckeHk4C(JG&j-x`+4Fd ztY41mjaap2U!Gnw!Ql<`#Bs+_!dFqX+u|_6;xJi|Zv1fLOwMk$TPcpj5z#eyxe0p0oz6j5tx@3eFgs<7QRXc)kXYsMS9+^?|{`A>g#yt z@PHDmifMQuDMlZu0QtfMch_zl=^91*YVHlU>qcSq-NufBAbN@1?-SRS|ZMHx2=GwyiVA z9_MLnwm!)=o(k3jq&PO!>*%^)BuQ9>%04&*fj9|UY0LFGi>qPZ9${x;yyiLj5y5{xPo^#=IrKyq2_71DKyhX6~;)1GQPLYB4B`L}FSDISJZ`*7}-}{vFm*eXWJF{-s(5cu3 zN1jH1AE@9%@`ou-5Bgo*%Yz|5r(6&clJT9J1r_-8NN1;>s-Cmw;Ma55k z`AN;P%^2Ka_w$XZvSxjQZgvF^MM`b0%L1UievXj|I>c96p>9yD{>B@gg^K#v= ztE%96(F2Db8tk!XE&R2YR|AArHXNGXVhSI}sbu z*W*HXN{1_ztJJ*6fi*^0@87RkwRn52zef}61F7$EQPx`jaZy257LM%*UlQe7bPUex zn0(GaFpbg_+$PBa`Z#|B%wuh_&K~4uIp)OV=W+9jaLM|E#Hq?FG8|EMH{+^0pczg8 z=lfn56Aae>S1Uhf7C0hUco@fG0#db zFZ@{GAX?0YUt2GTA!#ap=$`yp6$m0Z_5+y(6=2XtD?hpotgcCj6(I>0$w!XA>cOCU z#VbORsc77Xa=*xcj|hjEsw=#@lhE`_Ut%#AayfUXPm?*H5?B%)IvSB>eCu=$kZwgjO-CbV>+LN9XWo`A3L1JRK8sAFB#8AGDF;`b0)=((T z$s$=3jm^r!+cH?lis?-gfg3VtvowGmfISL`Q0zg;&5u7CDF)E#2K-;<*V(E^&-`?5FR(&RV?TbHu=KRhprrD2A&k z)-F9Jey@IPY%!NoRiPAkoh-U%%gq|<*Q7-C`~qlM{FWCuh{azu_w~e_$^0{y@^Wr^ z3>VNg>I^EE^5Hm8Zse*0?&w_BN)Ly-1WBOC2k~zTlXn@$#2BthF^r|pWv#vSo>muu zcb-(GEB3!^N*{MA9HZ?I`jO}jhYDz0abJm8E_>h#)OqS z?qlPs8CP9|NcVa9cQuB7#$gliT>> zVck!!acfFW9woK`>@VJJkFN~&7v2doE!XK$N)+B=eY<=)EcKUm{e=n%^+Mw-0!%3^ zBZ1leyB^_={FU(DL%V(L@f(>6EWOltcGcqV4d+4x%C8p-M$hiFxO2Yj{smJudLc*7 zY1%Cox9q00wET$So(_Vu<6^>~Yny?=$?dogWD{C3Iw6WyT{rxEmvB-hxpWZzbSlz! zpZD`T_oZ|}ky*N*U-BI_t5h6-s#Pq-6_s*4W8hLUo+?Y8tDH6ZrF<=@-DxSg_|v-PVtqGUD~b@J3f5JC-wycw?|gz z#~I}g)M~R`OVj+*eS@ENqn^lO^GfDA+R&A4bTc9C_)*ZJx}Q-y0^`x z2E~;P;La4gDlpZ7Pm4xVTRv+KWsNA@ccOwnwJj*~b)lkXQn5t+x{?6!m_}aT$)Q>y zCWH7z3ZW75yg$Yd^MSIqB_uS}diSsiIGRJVkVnS`XB-Yk-UdL&_0>R5?YNv! zhy=1+D|+J_>rjARViJrHrE6R1;K4(Yte&4u7*$(CiCsqn8SBA=XKo=DVB9<*LvDMpZlaPe2r_|LE_MWktUCG7=c#pWpyR~SYnETN zRe%}{iZ^bne`6Dn&98^`*TdMsQq?%X8;nin9Z$Sb*LQ&4wC+~vryh0zhWK4G-tz0M zp-7p0quE;sitIK7tAILSDs9*OMhIsIFVI>_#VCOXnw3}!M4k>E6}@zRMXdzzhgukL z74br5H2JD~%h#YUx`{G*oXYo8l`QEus!W;yfs!|7yv0VBDe0Zm4)Ddq!79@sV-+n? zClyP)o>-?H4YQPPS4Hg^V89a|pa*N2&%hJ8-xeRb03S#g82sqo3KY8WpvC=w8zFqY z((oFW00Tu~x2x$1*hHzGaK>Ltre+Fr-W-4V*mkfh8WX zwq=W`5rTrv2IXWU@bkb8!I{QFD~@;zng$*K9p6l7K&?%#5D9Z=AKO# zL9yx?M$o1bCJ#INL|^}DbF%E$1%hR{1RQ@8I3rTn-579&=nq1zn+}RG43ny3ud0+{M2hU`H0qLpVvycG5Mk_5oDdiuc_vO+l;d7Gj!GccNW3YhQDxwll zOqOdp$l{{SL`wIj{c!d0Q?nEao{ioXMo_RQg_&<(z$a}Y={QL3cE!sHUm65qU3$C8 zfcl2yZZip4$BTE*gJ(SqvT17p7G)kQaHzHi96Sm$cLTJWlTnKxz-qIhrT*%cEh*?6 z88nh7mcJW4%5Ya|F zl&FKM`fVMJ!v(12U4DBThoE1>&M$6_4UUsELluWJHVLH3?e^DG0c-+*U3o<2g&S9h zKTs<%PZn&6AB7Y+t=PE`k)W~)H3MsY2k!=f(>P2hH}5g;99i)F88U0@Ys2N0Qe_jR z9WNaKA9>GwyDqhyM%PNK+C>=pjKDV6RL*h1gGUF(aeA;GNjy0%0Z83Hi2IzT3#BR~ zn~_2GFEH7Ag+NlZk@GyF3{6~6?~@YN7G)CVh5dFlg?Etn^oC!qc%;nWt4s!`@p9rW zWB1U#V=>|sA%XeAS&mYZpXowGO;APm`d_X@q|8J6C4I~{LcceX5NQ z>l&af=I|WsBJrrCEGQ~`3WGkZM%fty}n!uv}7o~&*OkqVzX&C>(oXt89xLDrma6eeaC1F>Z^p;V;TOtG5QizD6NC;%i z@oozLI_cDp+XT!k+XPPJgO@~D^c{Y}Eb9<5ncD=?&~1YCn88bUkzj&N-vPpAgqaL# zldY46v32AOG0TiIDb`|IH*=4-Ot|b4S#angS(-=L-@y>3b#HXTjf=#5qOWDb_W-@$ zsL#;fcYvIF)?rWs=8XUW&pPI}^Kr%e+fQSN1^{Qe4iJ`y4nHGI>JY3)-XV9wm&l;9 zi)5|<&HoeIXpDavnhsjVr{s|Xm+*-DFNv&LVkXZ7p8~@Fw>>3aUm}~jTr31h&J_ZA zT*y^1cbw_uOte2Demszf%f!wc#c5d<4TEqypo=lz97twl5gfVxiLwv`jrw==z$}@y zyHkDFC6VBQPhUSC5TFaP%6e>I9qaVaV>N6GLw(ngCd&pbqPRZ3By5Y6bwR(WmO~*quY}tU?+Nz~Xc$2H0)E<7gkOAOnBBOw$ zX?wegdyrq-r3hQV#+l*b=N#1Pq#=eorXH;00ACMn?}~hFKn^5yiA5*}lSig}bFiAd zL*Un(57n5*AJSG=FI@`!`K;UET-_{L`L_F=r|z=6m_|YogJ7VDLNH9WYB-U%YB00h zx}W;-RvByNtumRjb|0p3j%L;K91YtQ8BsQR8T`oBZt}>_8Z1<|?!(T8e-B}XrJ=XV zHe)_(NKf6`;wKm`zP_aXbGR;AY1e7_SD7Hf+XH&a* z1OmL6MOT+~2>?D$#r}km#h;0^8>4+&`UG0Vk7*e%A1zy0?YA5FVwpP71a|(qGO%Cu z+>dpsM>dr0w0Fd>|wR7f1Kv9Byk4dvR(bU z6BVMrrb3>wExUqDD#22FQ6WUSiZxSO7v>3BfmuQ>uw4nok0gA=LiKm(Y?wM!boFc+ z=o|wgJ%s_OhgCD(P!uEbh+qu;Bz(3SOjXH1rFD;{F(7E9k>BtpUHy;7ww;!DocvBi zkBi@sHnYWK+~&Xkf>vpDhtSi2ZCr8|JX(s}nYMab<^7oM+fD0?#L*jZYsg%ssjPGJ zg85gUm5D_o*8O5~6Rq4IyGf~5G0NPM9xM2;E3Af0dicTotX9g(sGKP zx|*uE&IsO>Ygu!VY&R-uj9-ICd+MvtjOoh8VRfA^N36CpT$g5W8=7;FOndk$Bkl7R zD|U1$a>ExZ#%>rV>`u0C>}Qme&u^6|CKMq#+s3(UD{3~Y_1x%G{2q-sNDkkWY}m=f zwQ_IkzdJI&LF3Wj_ldj4HiPj%EF-`V9QurOsJ~Q9cV)4H2n0%s6oEiA-cFO zNzYbGcZZFAKI)Ofdz7PgN^7!3QRe(Y?g=BjEtb}eH6Ka9g%;T5-F9DM-T)ZuBz1Wu%ehM zZbL-1h`|LHQpeg_k{&^@ZoN2~JbX<)SNihs#`+^V#D(4KrXK90(HhTvw08*+_A7&r zzk;vOccL{y+MEx+d>dYYaw~pmH%V_(&HZ#pLXhU=JrPPj9&tO3v(kmiTDquhC^>jE zlwQ#eAbzHcAP9CipO)7%Mj{9^`~XKX2_Xoa1>9D?-CVnfAlR@6Kn#yVKQId`WmxdP z`amiO8%0&~<5v?4!g|jx%E1GG7!BN>*DxXV+@Sk(Y~Z;6R)e(JBMO78xw;aBN+^BX z_@8w$pGj5gp3zuK-(j-)+8#i)_>?+&WhZOJFgJ+pWgZGO*b8_;Q>hUeA`*mU3Yn8A zdLRhyrHFVS3B<0Wq>NP4M71cu1-YgW8uth0kzoKHIT}VEaeLyjl>Q2I?3o}7%G$k+ z1I5IFaxl)7c2ueNLl8(K{12gI!O-KKF!~k*cL24ulS7YzYSks6*8G+8(6Ldu+=+UP zknW9q9#rtl+H-S}qoMk>1`j;QLBL(r1!w-3@tx|WLuO8CB=ar#2tE50MJ7b z{)D0QL}3REkj1AF8V$HmvQX$TIni&}VB2SRpk%Tk!8|Cb{BxAXx%iW!bEJY0?s&k| z86ZsN^K(GiQR)CX2FvUjqqSyIs=v1XXBu+cI}H~6o3gO%m@mRUf(XM**?zr7hRy0g z#%hM*SJhnUcX!?$K*?}$-}ArX1)peFiUqDzyTuxL4aEei&Me?Y1>yO`9;~P-qyA&* z+!WFNYW_doR1eIU@zjgjdfon2#kOAVWXHX6dWf~OzMeA~5GvQjg6lER`N=p8NaVg? zZJZkEw^ZX} z3N#G|gd1rlC&m-d{T7NE$nmSUzg`0up^*c81Jeu1hKk^$edBMh51UX+t3vCf=k*T| zS6m@2wARvlF&JY5w7?_K{S0h<3Xr`FtJe_9wshSM`!^1H#hyzp{~h@)w`{JKRBn*v zrXRxpRIiBWyle+K9Y$2N|3BfwL_r!RB07P;-zl4og>tWOdl&r66MQl?EI{P^yzLG= zl@mN*qN*k?_pGdfWrWd>Q!iddI|8Hw8}0U;pwDbL4}23OkzwGpm}yLp4upA(7XJ36 zC7e^heaJ|Fit_jr(H34!0#@fh31ifV?#bzR(B6l=0Bm?g&7&Ia^=_C*LiZpk%mGvX zdqf7+0i5KFR?l1wG;L6_y?mH10+^dfkeI~HDGUzGjqbxfR6$jPp@t6;?7-X^q55MO zb7yH3fQ-5dB?IQ3A`y(dQJEozc#u5MxH0B20FI@RTluf7H;(bvI((3cSy2B>BQWd!he$ue zfB_)@Vgvm^$Q6xqFWnRPfXRjc8}M{*(tnJ-eF1`YEHo1!@dHp6C>jW)eA>mM^#ZQH z`Mw_reE9{sr(RqHDj;#&{-4u*0S}6)WVHUj={~m$O6EY~Mn+W5#2qn!@!r9|{hwT4 zgmHb)gX{}H(eC)C|GOvtAAxO%*%Q$H|1GdT5TJ!d_kRNWpOL?TZSy;@B?4X@ZnF}m zgL6cdMu>?P3U$Z_g4^O!F8GpuQ9hdn`WCs9MPUsQTa#5?rUF$3y8JuePE=l1`Sa~R zCB)N1(p!qEb$$TAz0Pm7kxK5a5y7UcJ(PQ#`)2N0`EX9m`s*^-3Sk#y`>BCzkj3Gu zqwcqWjT#RD-XN+0`B?A6u=rZ2Vvt6`5dIaAmOXz&!a?ak-==&p3MPooi)mtZ01(~Q zB*3$!Z_8Zd<`$+vS%jJ9b|Qo|EdYoA(KJZFfdGXKn8S4a?iB3iZGo6t%|hw%_~tsnH}--=Dp{ghhdqBTT8IQAWDHL3 zWc{oB`q@ZT-?FR5ZbU5I0<|VzwxCj-mZCx0yHk~q+0aY?Jf`Tf za<#B_t%Q=9PBl`oB*}Z2(|Sf};3nT$!)yB%bWN5wdPPOr9!i$o0@TLl3#26@If&wS4rhp)&sVZSp7QI!`y`qI;aU_Im@IUN>=xP{8tuxbFYU9qhMU*-vPe& zvGi@M> z2;b5CXc#&?p`iNsF%Tlo54FfJq@W6Nz#o}7P-s$TJ(>@M23m&X?+Rq5=8WhJPlCw+ zBp_v=RUp8z2dwv%3FI{xT0*MB+(@7!>)zJqhv=;oeSOydZvtgl zWUDM^u}dR%b~Ru8NZJDZnP_K1{kGEc@lFYAxAV(cPr>&6f7nPBjs5{!Z7kSDMBe@B z+gbpF)QgR0YzBS9aDd*@F&p~@G32UZdU*p^^{(Eyfnt*@c9U(uG&H;MTXr=$||JIi~JDSgZY#GrTFM9S?X{pwu+A1|W9z*HN=#tZ(u`PG=6$ z0RE<9K@Vx<;)0&K?w0zW>$4Fc6)+so#aW%?CI1yzjMDa5Lo8g@mr*dH z>?w$+h}A51&Fon#UtM(V|EEBb0miXYUI)@2-^fq^xOG6C!0EfS^f5%zf zd#8l0=fE()c5&1oQDuYL4-i2omCO#0UCqR!^%3n{4-hsQnBGxk1=tZ9s;|1N0Je4x zR1L`G&&rPU(4wu5fp{ujJYhmP=4h;ffcw6^WBA|7?*vo%t)5kW;!G$q3dA_3c&P(t zT<|mp9HBvdb{?JewyywB;#qJ3DMBN67gH#q*ZouZq0grP=FjN{jp1x9jAa^B$>2!< zIEtJdcc~WvEpQa@0aearbhLq!)4!;IjU02lKm(!Ee4^z1>VtMrF8p^FziW^NiXdx9 zu7}524TQOf1S25rzu)3s(P>*z`PZ>uZ*@flQvrce(D>k=3fK=nq*JWU0aSysDd2Gz zH~=x}%>2S^&3M}@o#+Y(f~pZctAJ`;1;Jx~C>eU@7fviV--@lxFmU(@$&!nC5#F7U z(g0Nc0HCX=zYheA4=|_THx6hGy>a-x4p+La=Il`mcq#z60_R{JKg=Oec>z;>>ty znp%yYTUwiMeD4x!YGTLFSU$PmwQ0AH%Wu0wV;8jd?xf~mb%!#uMyjHH_3#nfZrk-1 z8hg=An;CpW4oh_$Enc&og@1G{lM(m{#YV7H)$+IEINBD2;`NJODWV_G=C(MPg*Jjs zhU~~>jhGIuNXQ$l-vlp_O|bHSFTO%HSa~3!Bb`TAyn>iU-RE}ONN2heYT_!HwGX=^ zYu5$`yJKsEA5K>AHz*i^x;dSN=ZFGu;4_1tsj#vaM;k$YUUGw^`PNT`aVjqn=lR4o z(rk_?c_Z*Rd%lhI)bMz$&48z2HAi`rwHU&^!7jNG-2KFUw2d@4PG}$Ss$4uNX9S6= z{SsTtH0~YU+BPzJ^$e=?(HRsz3lGHaJR@tdXQCb5x79dWB31_yAWBp%C&Vj=cLOOk zgYRtFKGrq@%lG>!YqYmy^Aqr@KcgDdcD)VRrZp2LdA7p)=`%*Jpr31Nti?>1L;PpN z<8t`#`$yJFJ!7$-!M{i26;r!5(LWJgTMV@AgblPU`@D7tFt#;HiB{oEhyM)zdasQq zP&}){4l7SYrP1`QjoTiPXHf6XpFst+k;+bdY;C(fj{dn8P*rnV3ND6katzCexm^MgCI5()hCc7{zBCb4}R4`%^5j$$TPSpCE(Mvts zaz-1C@gB7kuwQZ!^>dzv;3^&x&lDuo?#^#QM+WUE!|b{0EZ606$zEIEvXc=Z+7dNe70 zGMegh_z8jUP*x0f#hNYgCDF_C#KrS+wH0}oO>8InS{jlyXWmhjd$;w;Xcnd}BCzdd zoA%h}Nyat@;;@o!9Kq79^P*M04)q6s)R z(0O#*UNpO?-NgS=Ww52(=%q@A=Jo@EHV@H7!CeWrFv3@-{h2X4 zeUPts@m``6DVuV26JwaF zbwE2{^mYr+mizS15p36WalvntB<_{x#Y)Hds*cX8u$l{IqZn$3(--hs?RHwZiDE{6 z#Tr0F3|(67SWQRg8msU(w)c9CKXb!wywC=ihqk!7rZ1dat`z3A$Ik;6nP}yPy#2gC z{Q#2S0$2~7zD@Y~Et7A}k<$pIK_z$EUEJeXZT8FE&<)cMv>h+ndF>^4TS}%BQ^VP~ zL-v;xb9}@~*(tgTm0I|1DA&KiWyMMfZoSZ>N^UERR52Zp%}f?NzQ$)G*PILvVBhrP z9L-ftSxO(UrRw1(e<>W~r_1!9N_&FDeK~NpguG8EY!t`g{r!WW%6Nao9xXZ#XGcwd@r6C?{~f8 z`0V0z63El0Hb@mh2)bXvoWqr{1c@QRtaTRt_Ow^6=TI`kjB|~UJsN~Da495K7swY#-T98 zW|O3%G{VMv{1_h+aU*lO7|Z*JGk%1GX+5SG3x}^1he$C^7aPyi93KKprja6xEXE3M zFz0vUPt4^OxV%Hj|NJRlBo93$Ta_iz3AhRTc7q-)vs45x+90Ts9}0k_JrIyelAONF z$*jfl&~q*APvgZ28+@Re4BKF0zwk9ANtfgNp(34?=G9fW z@D-Q%=1bbuJ*XEGKk01H*-)|`$9Yq8gs;pL>Fi{&9J{10ZnB;MkD{+#9aB^J_F9IX z0hqTFhb5P92e%9kWVFo;do%IdECpFIw-FDZ=L4>2Fmk@h5v3ScCxJGUR5#J%g={Yp z+CZodl>p&l|91q4iQM!MHW!I@Dus(B!S+uj~s4tLrcHBUDmRP+5hSmkMbn-?@dj zS=)%3t64K=(nVh?jQm-WH*cbP8u>#1-pEf8&C&-f`XE$`^&A_u3>jCiQ97r!`?K)+VUXDAgNu>kcCffm5EHel4!rep~l+^kQUquZ3 zBPW`WX9ryNojKVqAc($hL9Zw@e>)C{VAw`8^1HztF(P1?kpr^JJ^n}jKRVzsEdt3b z(_EEJzZ7|@(s59=D8Vmi!wkS7K!*Rc9yoBzJiU;lT+s6LaES*BKZzQZ67XsLr`apk znv++7vs@Li@uDpL z)xf-a;P(>9V}C0`;~kx4|&0V8i1uJ&#q7?{|HHFt*G1CuNj?3c_+#STe1DT&vT3orJ27iVH7E*Q@GWX`AbDcA zC&JiY|IkKM?eQQ}d5Y(@vM4=sWy!?q*Rm%D79-e?GZ|=a^^-%WmN~9X$}PT^RAZsc zE0-Ma8}w8+&N{vm!$=o@b;_IkF|>>4@!hWWWNeiM;mE3V|G_M=3NOz+~ z^J1r>Nj-v1w%&4u2L1UBEsb~tXF&b(#Q;4hsqQE0cfIa3+#VyLhu5#lK1z0BD#Lt!6?9p%@`%X4y;Dqp({@mK8525F zFFl-bB!V9u+V~*imzPx;SIn=h93*hf?-iD%gP-R2(%Xw3PgkOUX3zB&uiC&gQRq3+ z=nYxUN0wG~^J~qh*5}N=3SYsm>*e&Le+{fU2(JYpS*BwzdJqM~B_Zn#kpslpi;BhO z*_8^9MBZ5U)WN+U533+vM7aA28V1yTtC)~fD@65WpY|gv`WP2Gbu`{j9~BA|G+gfN zjh7i2bQ_ecA7LKX75&`BSIXigTcoRPYaGDCXlx9f*?q=ydR;R7elGFp^^FAE;}?Z= zuk(8Lo7;Imr!dBzWaYJOAH3>dnXu`|^ZUZlKYp=U!GfeWK6|4?XK1EWqQiILv)(XI z&32Ku`t97xDlKv!b((OJJSDc`K@KhlADUyeI=BvR@nEUw={L_;h7WMMsk4tuHWPkn zOT;}fCeVWUhK+GwVq3_MG=1t}&s9a^@?<_rj2mdX?xfjV^yG!ghJZXgIQ69ez$Rxj zhShrFPGw?ULE;t113m?|^4Qdr<%4XwOw*_K5=Hi7wu2djw6oyt04rk=p^s?rBcI;E_{3N zHvuEg_%+r(k{Qoy!7pVzN*LtLT74y!?t<9T}e9JS+sN@E6ir!9P` zi!(4+KkW^WWnj6Fn+4wt@U~8U{kqNK)QH| za5;5qD?p#va-Mo^K9vQ3jyf0xUOAHcm5S50`|0Vfa2S(V%rIbVGL=QVi}Q!!cKRq7 zOJV5*0LsNvfICOEI2^!Ts#bPmY9=#Zj$)87KBSZLo{1WEsLQjN4$dFNX;XmR-*7cX zpVG95O3>D??%)jk*+~c34UH2=lePu_Pf)&(mL+Gp(*X$ATX7D_;;zQ5n1aTRLo`p# z$(sYoo&(Q3-v6Nq`1qVC3Euk?mLvJbi8smK)GCZ$D}2VsnD9&&=P>y1+{KA!SBg})F0XaW;0x#T}}e;S`Pl!$Am}9I56&7I1g|vV1td# zRGe|`)h}p;a{?a-tli=Qf&n!O^I{Z6{*9A9K7Le}@8hAwp8>R?>HufuT4zX{fgw!? z3#!n*R(lS~R0&Xvi+ln;@yN6bE#3%7iFWmAS^_f*;oA29wc5;?)d0DFTMa$>kJSJZ zc$1u(u`VQ@E^2YOz^)Ft$Y}e41(;<;5}E{Pdq!{&t69KAadPFr1tBSv#`DyU zH#aQDfW5>&wxNSgalVsN%2Lc}o;n&PEC=wSWdR7KJyqh*tmgkeSqX8{_h|9ICO4~Mblwn329(q z5&!-}AI#r2MHwhNj4H<0^SnDn{P@^uX5Q#~EjIpxokOhna=F-(Z&uRJU1^j*5$fL1 zu?aYqXHTYxdO0&DArNXWd*4*7!E>#{>&oX-`z4_Of2vdOi;L{XV>%2ce)Qf)rSOTP z!-l%J{D@NyP3pCV;YoeNdGeDFlJ}3N^Oij-6E|1xAwlKem)*dGM5nSyT4s?UZ&2$eKovVDQ>Z>+)~G?$}mm#=C;H$ zM-g#u6QzFLm|&!hmI`tW)v@fe6kbK zdri9y;%RUp9cTOE6(46KK#4l_oCoBQZG{ zlXZ$r#HU=9%PN`|+L&F;)cvbgjc9S2Z|)-oL`n&XZ=I;b-h0{BT7rSJl($k;cnt9r(VUvz*VN(hnBE0 z!v9s*SqH_@Zu=T{2o@M5FlcZB!F6B=?he6Wa2VX(Em)A?5G=R_C%C(NU|fnJ1uZWmA&2v*vQh&OX+Di#S+nS>8 zT@368T)_#ths&2HJ&re>_p4W%k(yu#f&B6)l00K%W(1)M@s0+sDDCIOo(JY_To8t| zKA~!!`E3;_ufWv$E_08|Avpakj{~z@M(WI%)Tt8M*C;pAeo?G2$fIoIh!H$LWHNp~ z)O4H`T^utjz?NMyZZKkFp^A?7OpY*b5LqAw4dw&>EE4XIogG4fze8(cCw z?_Bn&Ro5=o8K|*&JC7J#9&CepX`L~eO0hH-6uRn|;3^q>aWdvN>I8Li{ahtRwV^U8 zP|7K1{d~fB0!DJ-a|GcgRzN(;vTg8MX?lVAOPdMGpqU-Y&Y z$<4P~eylQ}r6uZS$VWSIF4W$S40l! zddNPWz$F>k^+zbA+2vy0!*0N9k@I(X{s{@3_?XC|8J(bmfwPlFq_%@MqGo8Ar(J|? zm5o6@WgGN0*a=VXH-EpN$qFCM0Yo8m=%CTqMg`BXy1(M$QHSOkdVQF0-FjIPWZoYL zYJTQxjLKz5K}wfI^Fi!Mm1a!%_Jl2u9NBZy*zsBe5>l1e6|&p;sS7JFjDOYQHdbCD z42#V@rk*WY)3Q@rbw4X~)mUMjBngk+@%J$*WYbPjOuU8`HB)ts?S*f-j3Mm8R<5s8 z8H{o=8dF}`W)iK#TA`|`Co01dLq6{hbH+}&EGzTKD`~1IDgFM~X9}n-a33|8M3M)XInIz{w}$#F(2Z9SR-h(L8z zi7vDkwssn<89kCtpk_ZXNq(-;+TE6<_w#PgaV3eT26ToFavtGCos~;33h~kr#dwGrro) z&4;%5Da;Q7q>$1ghz6@iYADRtTwVy0;y<&_vFst#`DBhiqBMSnhrAA2DA@O)UTYgH zNKGxtCmh`hIAz~FUJV*?jOI;1+{yGy`-440L(VKozr*Z; zRNLDUy_??+*Fci6kQhGp0UDTpgYWr(|L% zS7-}&;ozS^=!h?(LGiIA> z2NW+{M8DpnV;(X%X^v9K3CsD+hW{pwLBTNOv%d)A;S~}rxyUz9G$(lzkn{X38Tj=b z;zg+dt4jz(=aw>9UcN9QQi%moiw!arR8vTL@}z@7{~&nik!<4Gw5ahV)LW0zY1vrU zzy*xDYnWVAY()Iv-Dj#$_+@L@!#g=*?ec90R!Q&Gu+@R0T#n>Am$+TEeVL6$62DF+ z6=nMZsc(CbPLc$lKuVeP{6avEU`LBUvRDRgn}pa6tMwKyC+>!dBLkbI2!j`f*WuQ+ zValW9&Da}`u=8x@cOc0q-_{vbJV9@4BSI8|*OD z^4nikEZfnqI>e*EF8n=$^OuVzymG32hl0i*ECyxpjmj!!Hy;SCScdsZfQejFIXVk|wCjBK~E`uS5 z9eKefo=A@pFJv%GnaURDph_wjd4&!nesd5Y5I*6iOUL2*i<4PhYq&?wdGK_DoYL`< z!l?x!gJ5F6u`inK381%%B>k+4HW2mnq!_nmnj6Vc$MWjiuD}3D-ePtR3RGu0=snd1^*0(X z9vKjx#ZLP$UN6Q#wG`&jTRg|nRTf0vnqySeAy}tQ9YIDzwJ2ADxxjovJQH-rrIM0! zBvrInS#J8Z;|##p-bL>*QL# zM*lWUSjgBWW1K2!vorRid^yZhX$iEOybZ*N!hGj+?@hUUoZiVw$!PydAm3K;IE!5Q zU=RbgA$ftTQ|(kXW5T$$?VIg$ls+@#5Vb)~TGHs0{L#&kO(v15bOPqt13~iAx1Z!n zOix)XK)2FPC+-}pzLy-Y({|dk`4%oKDvoDpwc!oaD77!!u4n1^J)SmBy+T96lv- z$mO3Ebxf2~I;=uBM7c8H`L|E{m(w?V2Q6WoGF9iF+QR4rIMgT0_Lp0~N8DGP^Zvkj zB2RDg1ePA!sP9DhQ4fo2CQST(gnYInm9LVJ{5?H8+X1A_-JRDre}STIwHxkMK?c6J zTU&Z%A{$`1=i~$n+tQWU{;BQ$;bU@Ooatm!+5DCdyWyRX+DTl^o=8zlJW2tN!Tv-} z*a;dmJbReK|K-Nk!aZb^WW>B=!@4s?fZa)QzjBpz+;Ob#(8*B5+ZS%m%QejxYg3#g zF)pide*%cro<-;umPy-{mDw8nGvBNRc0KCrb`-o61lxj{7m=R;cq?0t^==*nIVaHR z?uJv9n-;{-m6KRg$jRR4m;vGOhy07J@ZhChpo7P0czWC=RVgD6D>75`cc#RX_XPI2 zpgsJ6W{1FB3a@Jy&d#KbP$D(Pk({U^WBMSA`nU%^qeFJbN`vfr2SKI|@3KYH>tDnY z$j=>M?{XY<7hx=++t^`bq5R_o8)Kz4u9%ap4tzZe>My{c$|W0V0U;q=uvpv0mM_pk z{Y>M1fo#0VXiYxONjQG$dw)q=6<+NfE+rxugpB|y-Mhz6UEmY)dG>58s>tvM?>^l8 z@++`4UEK}<&9`(2@Xx*iz-N^QlHJc$7nCB2^rAJazjz8FsAH5668pC5GKaE^RAP(Z zOAYWn??UFFqa`phQVIyu`|nwo(l!p0xzJ2RVC>TRyX_CaS|Q6yuOY;D)9op}kS4eE zuO|AL-*Bnvmjo1(;x{E@lZOkvpJpOcUD3+egxI=ws-z_!{StPfLJyxWI;va-LjQPG#wP%Tf)?PwilPs3*pjWEb zGUNHzx)@xc9&@OeUPvHX^``6D-2G;RSrJ`T1pI+7))ZTmIKH|DQm~C-8`ia%KM=byO$I>M> zv-Ru3c1GJON=(O=*3+zMcs@fWu$u&dq zDi=*;XKv=f&H*6|6v;uANW6qlocmLnAI}}=dBGb{jeU&oWxiEBcE+1J@e>BWB~oIg)o3xq6(jAXI73lfr2rPRvINP6X7YDQP`c^)yrfQAi{ka*gaY zf61jT<$X~PN7Pk^jHLC4z@}Ve2@;w-jPpN5PQgj>Ot6DDAIU#DcYnL9i!IxFnw{Im z3TW?N>pwgvl6d`fh=cZ10>a(u&x3{unpm_U703*dfYq-Q$IbXL+C7gDzs`W3rje1v zY^OZJPN@Qwp_oR~sx%h_DMK9zafw!?&^7ok0b*$mgm?2|NJtj=LW?J4vv0LLYdxC_`*BN9v;F@Gk2J_7jX*X9VjKYC$R-j;~_=|^1D z2|u_5^*6PLUtt`7CI(X8#br?sU`V9;Q=c9V0#@`V?LL$lDR`(d?d9LJBm|rm9L?%P zYu2WO#CxGO4nY6zee@xHm(yqJjJ(JutAJ|DsN zT__?XPPEE%y1V4=+tQEfR+UxVwc2GDKYM7^h~sbpmR1m*D7yh-VmQ4R)f>=eqV-d) zW^~HIy2}syp?X^!p?>>;9SB4x&ZVA{(>xCzJu(;2_qPsEa&t1SMbme`%*6yM-M+co z4LB#UoZCBypJ4+E%6fre(d`hWsy2!~@*WmlbV_5sH)_D=sndZcA(ib>B+*@mF0}zv zN>S=}sG(wSMc(kS)2;Bx_m-&*Gtp2R)R2pWkEVV3^T$5diVcgN7d!k!dN#(K0~sSF z-4H~Hqf5t4qw`3}t0`o(XAg`|2b?u74AGc*554}@+xS?2{43b_N#Iq@Yw}*NcFSrl z0&2Phs^E>xPeZp4WWL#tpLWTTF&*tm5^oYzT0{)Hsp^d_yU=Rd%!&=O@eUB%eRNId zTR83Tu%jm-1k5i+jwZp7vdl~BYoSR zRO0+h{W*(iKR*U!!9CvnTs$vjDk?paGX3m+Kp`;#D|b$hJwr~s(s(0b?OZ5FhvVH3 zvOwFp*H$Q#Adw$0;=0&WDZCL@z9GlP<7(r^Jk1^yMh=v$zDiYse{I33E7IsMfB1|D#Y~{j1uCetMXV$~=dVMnP_&Dk{RIxRcHUn_; zcsF|jdX*AE4o?Cq8%A{UMdXk=2pIkpK1JU)xHwe%a4q#g*~`)Sb!O3T;r{OP6D}bK zJku&d#x-Zz=2TMN%Aosd%09QJ{k$-1daF>UG=}GYlZO1ua^h+hg|G6*j-iD5Kbei2 zi;dHNXEs{%qAo~WIH)kOx2-6=;SfFj10H5Vta6Ltsax|gv<(WT8)={R-pucFN9WhO zXC99NIVzj!Ynzw;@Li9`%q}D2`Tf>A!Ct>YWl9R}8B%&iByJ-hoO?#QcnLRa`h$1l z2FaQ2_c!oQf^X)yh!Ud(gA-N;n^UacC)ZacpB2ke5+4v_N8yT_!;!g|Uz(|>nBkgB zbCv2Ub3f^4B};QBe-v1f3oZ|*FOr`v&b7t>3v7ejq!7LWQPc=rLmXY3;a;vpU~UyP zSE2?>=9TiYHU{S6N$DKv9OWEzcaMFXPm0Ab3tG(zO^tHiaV!}gg&#DTDNW=*Lf4j_|xD4UXXZYXauWRPt zx&I`2`4@nQ0Q_^W`*Z!f^5rl2pY1Atw*PNMmp_I75Bb-%@|XY59+kiR@E`uKRpp=D zKQ$kJInzHzmj74!@lW=jiQ-?DTj77$|L)a)V*hOFzu1li0>b}oaa9FWw0|w3|2b3s Lyn}(|zn=aJ&;y~I literal 0 HcmV?d00001 From 97785a3658b066771d439d4b46cdea5951cd7254 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 1 Dec 2023 11:26:54 +0000 Subject: [PATCH 02/30] initial structure --- .gitignore | 5 +- gtfs_skims/connectors.py | 0 gtfs_skims/graph.py | 0 gtfs_skims/preprocessing.py | 0 gtfs_skims/utils.py | 184 +++++++++++++++++++++++++++++++ gtfs_skims/variables.py | 18 +++ requirements/base.txt | 5 + tests/conftest.py | 15 +++ tests/test_data/centroids.csv | 19 ++++ tests/test_data/config_demo.yaml | 16 +-- tests/test_utils.py | 33 ++++++ 11 files changed, 282 insertions(+), 13 deletions(-) create mode 100644 gtfs_skims/connectors.py create mode 100644 gtfs_skims/graph.py create mode 100644 gtfs_skims/preprocessing.py create mode 100644 gtfs_skims/utils.py create mode 100644 gtfs_skims/variables.py create mode 100644 tests/test_data/centroids.csv create mode 100644 tests/test_utils.py diff --git a/.gitignore b/.gitignore index 06fd923..fb87f1a 100755 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,7 @@ reports/ mike-*.yml # Jupyter notebooks -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints + +sandbox.py +tests/test_data/outputs/ \ No newline at end of file diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py new file mode 100644 index 0000000..e69de29 diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py new file mode 100644 index 0000000..e69de29 diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py new file mode 100644 index 0000000..e69de29 diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py new file mode 100644 index 0000000..a5ab706 --- /dev/null +++ b/gtfs_skims/utils.py @@ -0,0 +1,184 @@ +from __future__ import annotations +from dataclasses import dataclass +import logging +import os +from pathlib import Path +from typing import Optional +import yaml +from zipfile import ZipFile + +import pandas as pd + + +def ts_to_sec(x: str) -> int: + """Convert a hh:mm:ss timestamp to seconds from midnight. + + Args: + x (str): Timestamp + + Returns: + int: Seconds from midnight + """ + s = [int(i) for i in x.split(':')] + return 3600*s[0]+60*s[1]+s[2] + + +def get_logger(path_output: Optional[str] = None) -> logging.Logger: + """Get the library logger. + + Args: + path_output (Optional[str], optional): Path to save the logs. Defaults to None. + + Returns: + logging.Logger: Logger. + """ + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + if len(logger.handlers) == 0: + logger.addHandler(handler) + else: + logger.handlers[0] = handler + + if path_output is not None: + parent_dir = Path(path_output).parent.absolute() + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + + file_handler = logging.FileHandler(path_output, mode='w') + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + + +@dataclass +class Config: + """Config file + + Example config file: + + ``` + paths: + path_gtfs: ./iow-bus-gtfs.zip + path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow + path_origins: ./centroids.csv # path to the origin points + path_destinations: ./centroids.csv # path to the destination points + + settings: + calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file. + start_s : 32400 # sec | Start time of the journey. + end_s : 41400 # sec | Max end time of a journey. + walk_distance_threshold : 2000 # m | Max walk distance in a leg + walk_speed : 4.5 # kph | Walking speed + crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances + max_transfer_time : 1800 # Max combined time of walking and waiting (sec) + k : 500 # max nearest neighbours when calculating distances + max_wait : 1800 # sec | Max wait time at a stop + bounding_box : null + + steps: + - preprocessing + - connectors + - graph + ``` + + """ + path_gtfs: str + path_outputs: str + path_origins: str + path_destinations: str + calendar_date: int + crows_fly_factor: float + max_transfer_time: int + k: int + end_s: int + bounding_box: dict + max_wait: int + start_s: int + walk_distance_threshold: int + walk_speed: float + steps: list + + @classmethod + def from_yaml(cls, path: str) -> Config: + """Construct class from a config yaml file. + + Args: + path (str): Path to the yaml config. + + Returns: + Config: Config object + """ + with open(path, 'r') as f: + config = yaml.safe_load(f) + config_flat = { + **config['paths'], + **config['settings'], + 'steps': config['steps'] + } + return cls(**config_flat) + + def __repr__(self) -> str: + s = 'Config file\n' + s += '-'*50 + '\n' + s += yaml.dump(self.__dict__) + return s + + +@dataclass +class GTFSData: + calendar: pd.DataFrame + routes: pd.DataFrame + stops: pd.DataFrame + stop_times: pd.DataFrame + trips: pd.DataFrame + + @classmethod + def from_gtfs(cls, path_gtfs: str) -> GTFSData: + """Load GTFS tables from a standard zipped GTFS file. + + Args: + path_gtfs (str): Path to a zipped GTFS dataset. + + Returns: + GTFSData: GTFS data object. + """ + data = {} + with ZipFile(path_gtfs, 'r') as zf: + for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + with zf.open(f'{name}.txt') as f: + data[name] = pd.read_csv(f, low_memory=False) + return cls(**data) + + @classmethod + def from_parquet(cls, path: str) -> GTFSData: + """Construct class from pre-processed GTFS tables in Parquet format. + + Args: + path (str): Path to tables. + + Returns: + GTFSData: GTFS data object. + """ + data = {} + for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + data[name] = pd.read_parquet( + os.path.join(path, f'{name}.parquet.gzip')) + return cls(**data) + + def save(self, path_outputs: str) -> None: + """Export all tables in zipped parquet format. + + Args: + path_outputs (str): Directory to save outputs. + """ + if not os.path.exists(path_outputs): + os.makedirs(path_outputs) + + for k, v in self.__dict__.items(): + v.to_parquet(os.path.join( + path_outputs, f'{k}.parquet.gzip'), compression='gzip') diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py new file mode 100644 index 0000000..f1e9591 --- /dev/null +++ b/gtfs_skims/variables.py @@ -0,0 +1,18 @@ +# route types lookup +# source: https://developers.google.com/transit/gtfs/reference#routestxt +# and https://developers.google.com/transit/gtfs/reference/extended-route-types +ROUTE_TYPES = { + 0: 'tram', # Tram, Streetcar, Light rail. + 1: 'underground', # Subway, Metro. + 2: 'rail', # Rail. Used for intercity or long-distance travel. + 3: 'bus', # Bus. Used for short- and long-distance bus routes. + 4: 'ferry', # Ferry. Used for short- and long-distance boat service. + 5: 'cable', + 6: 'cable aerial', + 7: 'furnicular', # Funicular. Any rail system designed for steep inclines. + 11: 'trolley', # Trolleybus. + 12: 'monorail', # Monorail. + 200: 'coach', # Coach Service + 401: 'undergound', # Metro Service + 402: 'underground', # Underground Service +} \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt index 9c9350d..b1ccdb9 100755 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,3 +1,8 @@ # this dependency exists so that the base file is not empty # it was chosen since it is a dependency that is included in any python environment already +fastparquet +graph-tool +numpy +pandas +pyproj zipp \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index a6de8b3..8f6127a 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,9 +7,15 @@ def test_content(response): assert response.content ``` """ +import os +from pathlib import Path import pytest +from gtfs_skims.utils import Config, GTFSData + +TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data') + @pytest.fixture def response(): @@ -19,3 +25,12 @@ def response(): """ # import requests # return requests.get('https://github.com/arup-group/cookiecutter-pypackage') + + +@pytest.fixture +def config(): + return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml')) + +@pytest.fixture +def gtfs_data(): + return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) diff --git a/tests/test_data/centroids.csv b/tests/test_data/centroids.csv new file mode 100644 index 0000000..5fbb1e0 --- /dev/null +++ b/tests/test_data/centroids.csv @@ -0,0 +1,19 @@ +name,longitude,latitude +E02003587,-1.155884355526687,50.72185610082279 +E02003586,-1.173878456845198,50.72301086140903 +E02003585,-1.224069337360162,50.72613466907799 +E02003584,-1.158508858308219,50.72873880022016 +E02003583,-1.28149160450734,50.75184512743099 +E02003582,-1.31006711916123,50.75340385923516 +E02003581,-1.297514083246226,50.75714214474556 +E02003589,-1.302344772901295,50.70267302231341 +E02003588,-1.284284792950486,50.703256856445805 +E02003597,-1.283759136860344,50.61568691131167 +E02003596,-1.175831901564544,50.63446808149097 +E02003595,-1.180359149209377,50.64417605386147 +E02003594,-1.154986228595743,50.659399194734654 +E02003593,-1.386537290710913,50.68165562633486 +E02003592,-1.526357692381475,50.68396619141156 +E02003591,-1.30109588996732,50.69411141243501 +E02003590,-1.096598389236477,50.69424449742397 +E02003598,-1.210809598549173,50.59781051582961 diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index 66db500..85e810a 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -1,8 +1,8 @@ paths: - path_gtfs: /mnt/efs/otp/gtfs_transfers/iow-bus-gtfs.zip + path_gtfs: ./iow-bus-gtfs.zip path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow - path_zone_centroids: /mnt/efs/zones/msoa_centroids.geojson # path to zone centroids to skim - + path_origins: ./centroids.csv # path to the origin points + path_destinations: ./centroids.csv # path to the destination points settings: calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file. @@ -19,12 +19,4 @@ settings: steps: - preprocessing - connectors - - graph - -# Settings for AWS batch (Optional) -aws_job_definition: - image: "815306348607.dkr.ecr.eu-west-1.amazonaws.com/ukpop:latest" - memory: 32000 - vcpus: 4 - job_definition_name: "gtfs-ukpop11" - job_queue: "general_purpose_queue" \ No newline at end of file + - graph \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..88489f5 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,33 @@ +import os + +import pandas as pd + +from gtfs_skims import utils + + +def test_parse_timestamp(): + assert utils.ts_to_sec('00:00:00') == 0 + assert utils.ts_to_sec('10:01:01') == 36061 + + +def test_get_logger(tmpdir): + logger = utils.get_logger(os.path.join(tmpdir, 'logs', 'log.log')) + logger.info('test') + + +def test_load_config(config): + 'path_gtfs' in config.__dict__ + + +def test_load_gtfs(gtfs_data): + for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + assert isinstance(getattr(gtfs_data, x), pd.DataFrame) + + +def test_cache_gtfs(gtfs_data, tmpdir): + gtfs_data.save(tmpdir) + gtfs_cached = utils.GTFSData.from_parquet(tmpdir) + for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + pd.testing.assert_frame_equal( + getattr(gtfs_data, x), getattr(gtfs_cached, x) + ) From ea8c3da7a020b3313a9ad753bbce88f3129dd5c0 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 1 Dec 2023 17:31:30 +0000 Subject: [PATCH 03/30] filtering --- README.md | 2 +- gtfs_skims/preprocessing.py | 42 ++++++++++++++++++++++++++++++++++++ gtfs_skims/utils.py | 15 +++++++++++++ resources/logos/title.png | Bin 0 -> 750 bytes tests/test_preprocessing.py | 9 ++++++++ tests/test_utils.py | 4 ++++ 6 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 resources/logos/title.png create mode 100644 tests/test_preprocessing.py diff --git a/README.md b/README.md index db9a868..d6ddf93 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![gtfs_skims](resources/logos/title.png) -# gtfs-skims (gtfs_skims) +# Argo (gtfs_skims) [![Daily CI Build](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml/badge.svg)](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml) [![Documentation](https://github.com/arup-group/gtfs_skims/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages)](https://arup-group.github.io/gtfs_skims) diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py index e69de29..9b4d8b9 100644 --- a/gtfs_skims/preprocessing.py +++ b/gtfs_skims/preprocessing.py @@ -0,0 +1,42 @@ + + +from gtfs_skims.utils import GTFSData, get_weekday + + +def filter_day(data: GTFSData, date: int) -> None: + """Filter the GTFS for a specific date in the calendar. + + Args: + data (Data): GTFS data object + date (int): Date as yyyymmdd + """ + weekday = get_weekday(date) + data.calendar = data.calendar[ + (data.calendar['start_date'] <= date) & + (data.calendar['end_date'] >= date) & + (data.calendar[weekday] == 1) + ] + + data.trips = data.trips[ + data.trips['service_id'].isin( + set(data.calendar['service_id']) + ) + ] + + data.routes = data.routes[ + data.routes['route_id'].isin( + set(data.trips['route_id']) + ) + ] + + data.stop_times = data.stop_times[ + data.stop_times['trip_id'].isin( + set(data.trips['trip_id']) + ) + ] + + data.stops = data.stops[ + data.stops['stop_id'].isin( + set(data.stop_times['stop_id']) + ) + ] diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index a5ab706..d646cd5 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime import logging import os from pathlib import Path @@ -23,6 +24,20 @@ def ts_to_sec(x: str) -> int: return 3600*s[0]+60*s[1]+s[2] +def get_weekday(date: int) -> str: + """Get the weekday of a date + + Args: + date (int): Date as yyyymmdd + + Returns: + str: Day name + """ + weekday = datetime.strptime(str(date), '%Y%m%d') + weekday = datetime.strftime(weekday, '%A').lower() + return weekday + + def get_logger(path_output: Optional[str] = None) -> logging.Logger: """Get the library logger. diff --git a/resources/logos/title.png b/resources/logos/title.png new file mode 100644 index 0000000000000000000000000000000000000000..24ad2cf1780872fcb2d1f0568fc4d1f5b29bf3bd GIT binary patch literal 750 zcmeAS@N?(olHy`uVBq!ia0vp^2_VeD3?#3*wSy!Wi-X*q7}lMWc?smOq&xaLGB9lH z=l+w(3gjCH_=LCuX;Q&rR=1NtJpv^`e!&a^3I+}V2?Y)P^Vjb`fB(I*R8tpFjUkfjqFSsr|NQgMKWp~Y@A|NSS;uBwNz++#s{`+hW$WT29yuM~_Ca>L-;Crp zEx*O?bFP$MJK@(x;h_D(LH7Fu4;k0{aWBnpy=3n6a=+M$r7a&8q-zAkb6)BH=*R!< zn4w^leCsv#gw>{!owd41DM@x%rFO<%%)8FQ|^ zKkw-60C|>w5sU0zszmFXTb;DGxyIhOttE5Qp{ki`g0K3%t)1{Y>_n}$%=_$x-?j$) zj*@t}_&}}5j{ZXt@hmU96>0_De(NO6XMMBT;NJSCcj1oTR~Ias&wBfO)7$iiG4@w? z)l~(S{8gFr<~Pv0d-itUytn(ouFp!G*7`!b-Ty3^oO5qp$)worTZIobeLwy8srV+* zXC>Ji4w>3+cG@>Z|`Ra*k>HrE%fY+9jNOzPM#Fy}$aN{~X8HYsoF2ju{=gxKHp$*ix1%wFKmFB!u tPkj5!^TjVui|gNi*4P1~{I~P$W%9hCzL#0wR+NAerl+f)%Q~loCIFwcPF?^2 literal 0 HcmV?d00001 diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 0000000..f1de471 --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,9 @@ +from gtfs_skims import preprocessing + + +def test_filter_date(gtfs_data): + a = 1 + assert 14 in gtfs_data.calendar.service_id.values + preprocessing.filter_day(gtfs_data, 20180507) + assert list(gtfs_data.calendar.service_id) == [14] + assert set(gtfs_data.trips['service_id']) == set([14]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 88489f5..5585b62 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,6 +15,10 @@ def test_get_logger(tmpdir): logger.info('test') +def test_weekday(): + assert utils.get_weekday(20231201) == 'friday' + + def test_load_config(config): 'path_gtfs' in config.__dict__ From 42c9f89661386074c92135d394d2d8ddd63b0f43 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 4 Dec 2023 17:54:29 +0000 Subject: [PATCH 04/30] complete preprocessing --- gtfs_skims/preprocessing.py | 126 +++++++++++++++++++++++- test_data/outputs/log_preprocessing.log | 1 + tests/test_data/config_demo.yaml | 8 +- tests/test_preprocessing.py | 43 ++++++++ 4 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 test_data/outputs/log_preprocessing.log diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py index 9b4d8b9..aa66e97 100644 --- a/gtfs_skims/preprocessing.py +++ b/gtfs_skims/preprocessing.py @@ -1,6 +1,9 @@ +import os +import pyproj -from gtfs_skims.utils import GTFSData, get_weekday +from gtfs_skims.utils import ( + GTFSData, Config, get_weekday, ts_to_sec, get_logger) def filter_day(data: GTFSData, date: int) -> None: @@ -40,3 +43,124 @@ def filter_day(data: GTFSData, date: int) -> None: set(data.stop_times['stop_id']) ) ] + + +def filter_time(data: GTFSData, start_time: int, end_time: int) -> None: + """Filter the GTFS for a specified time window. + + Args: + data (Data): GTFS data object + start_time (int): Start of the time window (seconds from midnight) + end_time (int): End of the time window (seconds from midnight) + """ + # filter stop times + data.stop_times['departure_s'] = data.stop_times['departure_time'].apply( + ts_to_sec) + data.stop_times['arrival_s'] = data.stop_times['arrival_time'].apply( + ts_to_sec) + data.stop_times = data.stop_times[ + (data.stop_times['arrival_s'] >= start_time) & + (data.stop_times['departure_s'] <= end_time) + ] + + # filter stops + data.stops = data.stops[data.stops['stop_id'].isin( + set(data.stop_times['stop_id']) + )] + + # filter trips + data.trips = data.trips[data.trips['trip_id'].isin( + set(data.stop_times['trip_id']) + )] + + # filter routes + data.routes = data.routes[data.routes['route_id'].isin( + set(data.trips['route_id']) + )] + + +def add_coordinates(data: GTFSData) -> None: + """Add BNG coordinates to the stop and stoptime tables. + + Args: + data (Data): Data object. + """ + transformer = pyproj.Transformer.from_crs( + pyproj.transformer.CRS('epsg:4326'), + pyproj.transformer.CRS('epsg:27700'), always_xy=True) + + data.stops['x'], data.stops['y'] = transformer.transform( + data.stops['stop_lon'], data.stops['stop_lat'] + ) + + data.stops['x'] = data.stops['x'].round().map(int) + data.stops['y'] = data.stops['y'].round().map(int) + + data.stop_times['x'] = data.stop_times['stop_id'].map( + data.stops.set_index('stop_id')['x'] + ) + data.stop_times['y'] = data.stop_times['stop_id'].map( + data.stops.set_index('stop_id')['y'] + ) + + +def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: int) -> None: + """Filter a GTFS with a bounding box. Coordinates are using the BNG projection. + + Args: + data (Data): Data object. + xmin (int): Min Easting. + xmax (int): Max Easting. + ymin (int): Min Northing. + ymax (int): Max Northing + """ + data.stops = data.stops[ + (data.stops['x'] >= xmin) & + (data.stops['x'] <= xmax) & + (data.stops['y'] >= ymin) & + (data.stops['y'] <= ymax) + ] + + # filter stop times + data.stop_times = data.stop_times[ + data.stop_times['stop_id'].isin( + set(list(data.stops['stop_id'])) + ) + ] + + # filter trips + data.trips = data.trips[data.trips['trip_id'].isin( + set(data.stop_times['trip_id']) + )] + + # filter routes + data.routes = data.routes[data.routes['route_id'].isin( + set(data.trips['route_id']) + )] + + +def main(path_config: str) -> None: + """Run the preprocessing pipeline + + Args: + path_config (str): Path to the config file. + """ + config = Config.from_yaml(path_config) + logger = get_logger(os.path.join( + config.path_outputs, 'log_preprocessing.log')) + logger.info('Reading files...') + data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs) + + logger.info('Time filtering..') + filter_day(data, config.calendar_date) + filter_time(data, config.start_s, config.end_s) + add_coordinates(data) + + if config.bounding_box is not None: + logger.info('Cropping to bounding box..') + filter_bounding_box(data, **config.bounding_box) + + logger.info(f'Saving outputs at {config.path_outputs}') + data.save(config.path_outputs) + + logger.info(f'Preprocessing complete.') diff --git a/test_data/outputs/log_preprocessing.log b/test_data/outputs/log_preprocessing.log new file mode 100644 index 0000000..cade8fa --- /dev/null +++ b/test_data/outputs/log_preprocessing.log @@ -0,0 +1 @@ +2023-12-04 17:44:51,834 - gtfs_skims.utils - INFO - Reading files... diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index 85e810a..8bc2241 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -1,8 +1,8 @@ paths: - path_gtfs: ./iow-bus-gtfs.zip - path_outputs: /mnt/efs/otp/gtfs_transfers/skims_iow - path_origins: ./centroids.csv # path to the origin points - path_destinations: ./centroids.csv # path to the destination points + path_gtfs: ./tests/test_data/iow-bus-gtfs.zip + path_outputs: ./tests/test_data/outputs + path_origins: ./tests/test_data/centroids.csv # path to the origin points + path_destinations: ./tests/test_data/centroids.csv # path to the destination points settings: calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file. diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index f1de471..e5d34db 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,3 +1,6 @@ +import os +from pathlib import Path + from gtfs_skims import preprocessing @@ -7,3 +10,43 @@ def test_filter_date(gtfs_data): preprocessing.filter_day(gtfs_data, 20180507) assert list(gtfs_data.calendar.service_id) == [14] assert set(gtfs_data.trips['service_id']) == set([14]) + + +def test_filter_time(gtfs_data): + start_time = 9*3600 + end_time = 10*3600 + preprocessing.filter_time(gtfs_data, start_time, end_time) + assert gtfs_data.stop_times['arrival_s'].min() >= start_time + assert gtfs_data.stop_times['departure_s'].max() <= end_time + + +def test_projected_coords_within_bounds(gtfs_data): + preprocessing.add_coordinates(gtfs_data) + # check that the BNG coordinates fall within an Isle-of-Wight bounding box + xmin, ymin = 423104, 69171 + xmax, ymax = 471370, 101154 + + assert gtfs_data.stops['x'].min() > xmin + assert gtfs_data.stops['x'].max() < xmax + assert gtfs_data.stops['y'].min() > ymin + assert gtfs_data.stops['y'].max() < ymax + + +def test_within_bounding_box(gtfs_data): + preprocessing.add_coordinates(gtfs_data) + + # filter for Cowes + xmin, ymin = 447477, 92592 + xmax, ymax = 451870, 96909 + assert gtfs_data.stops['x'].min() < xmin + preprocessing.filter_bounding_box( + gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) + + assert gtfs_data.stops['x'].min() > xmin + assert gtfs_data.stops['x'].max() < xmax + assert gtfs_data.stops['y'].min() > ymin + assert gtfs_data.stops['y'].max() < ymax + +def test_run_preprocessing(): + path_config = os.path.join(Path(__file__).parent, 'test_data', 'config_demo.yaml') + preprocessing.main(path_config) From 6219edde8e7dd7e5ed0f0213d3fa43c40f121dc2 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 4 Dec 2023 17:55:30 +0000 Subject: [PATCH 05/30] remove folder --- test_data/outputs/log_preprocessing.log | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test_data/outputs/log_preprocessing.log diff --git a/test_data/outputs/log_preprocessing.log b/test_data/outputs/log_preprocessing.log deleted file mode 100644 index cade8fa..0000000 --- a/test_data/outputs/log_preprocessing.log +++ /dev/null @@ -1 +0,0 @@ -2023-12-04 17:44:51,834 - gtfs_skims.utils - INFO - Reading files... From 86a2045bcee2af7dc13af5165409fb278647e582 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Thu, 7 Dec 2023 18:35:20 +0000 Subject: [PATCH 06/30] connectors --- gtfs_skims/connectors.py | 54 +++++++++++++++++++++++++++++++ gtfs_skims/preprocessing.py | 20 ++++++++---- gtfs_skims/utils.py | 3 ++ tests/conftest.py | 2 +- tests/test_connectors.py | 55 ++++++++++++++++++++++++++++++++ tests/test_data/config_demo.yaml | 1 + tests/test_preprocessing.py | 13 ++++++-- 7 files changed, 137 insertions(+), 11 deletions(-) create mode 100644 tests/test_connectors.py diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index e69de29..c52dda3 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -0,0 +1,54 @@ +import os + +import numpy as np +from scipy.spatial import KDTree + +from gtfs_skims.utils import Config, GTFSData, get_logger + + +def query_pairs(coords: np.array, maxdist: float) -> np.array: + ids = coords[:, 2].argsort() + + dtree = KDTree(coords[ids]) + connectors = dtree.query_pairs(r=maxdist, output_type='ndarray', p=2) + + return ids[connectors] + + +def query_pairs_filter(coords: np.array, maxdist: float) -> np.array: + ods = query_pairs(coords, maxdist) + coords_o = coords[ods[:, 0]] + coords_d = coords[ods[:, 1]] + + dcoords = coords_d - coords_o + walk = (dcoords[:, :2]**2).sum(1)**0.5 # euclidean distance on xy + wait = dcoords[:, 2] - walk + + cond = (wait > 0) & ((walk+wait) <= maxdist) + + return ods[cond] + + +def get_access_connectors(data: GTFSData, config: Config): + # ... query ball tree + pass + + +def get_egress_connectors(data: GTFSData, config: Config): + # ... query ball tree + pass + + +def main(data: GTFSData, config: Config): + logger = get_logger(os.path.join( + config.path_outputs, 'log_connectors.log')) + + # get feasible connections + logger.info('Getting transfer connectors...') + transfer_connectors = get_transfer_connectors(data, config) + logger.info('Getting access connectors...') + access_connectors = get_access_connectors(data, config) + logger.info('Getting egress connectors...') + egress_connectors = get_egress_connectors(data, config) + + # save diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py index aa66e97..f637d06 100644 --- a/gtfs_skims/preprocessing.py +++ b/gtfs_skims/preprocessing.py @@ -79,15 +79,16 @@ def filter_time(data: GTFSData, start_time: int, end_time: int) -> None: )] -def add_coordinates(data: GTFSData) -> None: +def add_coordinates(data: GTFSData, epsg: int = 27700) -> None: """Add BNG coordinates to the stop and stoptime tables. Args: data (Data): Data object. + epsg (int): The target coordinate system """ transformer = pyproj.Transformer.from_crs( pyproj.transformer.CRS('epsg:4326'), - pyproj.transformer.CRS('epsg:27700'), always_xy=True) + pyproj.transformer.CRS(f'epsg:{epsg}'), always_xy=True) data.stops['x'], data.stops['y'] = transformer.transform( data.stops['stop_lon'], data.stops['stop_lat'] @@ -139,22 +140,25 @@ def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: i )] -def main(path_config: str) -> None: - """Run the preprocessing pipeline +def main(config: Config) -> GTFSData: + """Run the preprocessing pipeline and save resulting tables to disk. Args: - path_config (str): Path to the config file. + config (Config): Path Config object. + + Returns: + GTFSData: Pre-processed GTFS data object. """ - config = Config.from_yaml(path_config) logger = get_logger(os.path.join( config.path_outputs, 'log_preprocessing.log')) + logger.info('Reading files...') data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs) logger.info('Time filtering..') filter_day(data, config.calendar_date) filter_time(data, config.start_s, config.end_s) - add_coordinates(data) + add_coordinates(data, epsg=config.epsg_centroids) if config.bounding_box is not None: logger.info('Cropping to bounding box..') @@ -164,3 +168,5 @@ def main(path_config: str) -> None: data.save(config.path_outputs) logger.info(f'Preprocessing complete.') + + return data diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index d646cd5..802c6bf 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -94,6 +94,8 @@ class Config: k : 500 # max nearest neighbours when calculating distances max_wait : 1800 # sec | Max wait time at a stop bounding_box : null + epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. + steps: - preprocessing @@ -112,6 +114,7 @@ class Config: k: int end_s: int bounding_box: dict + epsg_centroids: int max_wait: int start_s: int walk_distance_threshold: int diff --git a/tests/conftest.py b/tests/conftest.py index 8f6127a..2022fb1 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,4 +33,4 @@ def config(): @pytest.fixture def gtfs_data(): - return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) + return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) \ No newline at end of file diff --git a/tests/test_connectors.py b/tests/test_connectors.py new file mode 100644 index 0000000..bd9de9a --- /dev/null +++ b/tests/test_connectors.py @@ -0,0 +1,55 @@ +import itertools + +import numpy as np +import pytest + +from gtfs_skims import connectors + + +@pytest.fixture() +def points(): + p = np.arange(-20, 20, 2.5) + coords = np.array([(x, y, z) for x, y, z in itertools.product(p, p, p)]) + return coords + + +def find_index(coords, x, y, z): + idx = np.where(np.all(coords == np.array([x, y, z]), axis=1))[0][0] + return idx + + +def get_valid_points(coords, source_idx, max_trasfer_dist): + dcoords = coords - coords[source_idx] + walk = (dcoords[:, :2]**2).sum(1)**0.5 # euclidean distance on xy + wait = dcoords[:, 2] - walk + + is_valid = (wait > 0) & ((walk+wait) <= max_trasfer_dist) + + return is_valid + + +@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) +def test_query_all_valid_included(points, source): + """ All valid points are included in the query results """ + source_idx = find_index(points, *source) + maxdist = 10 + radius = maxdist * (2**0.5) + # ods = connectors.query_pairs(points, radius) + ods = connectors.query_pairs_filter(points, radius) + is_valid = get_valid_points(points, source_idx, maxdist) + + ds = ods[ods[:, 0] == source_idx, 1] + assert is_valid[ds].sum() == is_valid.sum() + + +@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) +def test_query_all_included_valid(points, source): + """ All results from the query are valid """ + source_idx = find_index(points, *source) + maxdist = 10 + radius = maxdist * (2**0.5) + ods = connectors.query_pairs_filter(points, radius) + is_valid = get_valid_points(points, source_idx, maxdist) + + ds = ods[ods[:, 0] == source_idx, 1] + assert all(is_valid[ds]) diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index 8bc2241..e5b6bdf 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -15,6 +15,7 @@ settings: k : 500 # max nearest neighbours when calculating distances max_wait : 1800 # sec | Max wait time at a stop bounding_box : null + epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. steps: - preprocessing diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index e5d34db..acba58e 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,5 +1,6 @@ import os from pathlib import Path +import pytest from gtfs_skims import preprocessing @@ -47,6 +48,12 @@ def test_within_bounding_box(gtfs_data): assert gtfs_data.stops['y'].min() > ymin assert gtfs_data.stops['y'].max() < ymax -def test_run_preprocessing(): - path_config = os.path.join(Path(__file__).parent, 'test_data', 'config_demo.yaml') - preprocessing.main(path_config) + +def test_run_preprocessing_demo(config, tmpdir): + path_outputs = os.path.join(tmpdir, 'outputs') + config.path_outputs = path_outputs + preprocessing.main(config) + for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + assert os.path.exists( + os.path.join(path_outputs, f'{x}.parquet.gzip') + ) From f83d44dbbcf7e830d6df2cdfcfe2d61fb1695983 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 8 Dec 2023 09:18:00 +0000 Subject: [PATCH 07/30] connectors tests --- gtfs_skims/connectors.py | 20 +++++++++++++------- tests/test_connectors.py | 5 ++--- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index c52dda3..4b371c2 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -6,27 +6,33 @@ from gtfs_skims.utils import Config, GTFSData, get_logger -def query_pairs(coords: np.array, maxdist: float) -> np.array: +def query_pairs(coords: np.array, radius: float) -> np.array: ids = coords[:, 2].argsort() dtree = KDTree(coords[ids]) - connectors = dtree.query_pairs(r=maxdist, output_type='ndarray', p=2) + connectors = dtree.query_pairs(r=radius, output_type='ndarray', p=2) return ids[connectors] def query_pairs_filter(coords: np.array, maxdist: float) -> np.array: - ods = query_pairs(coords, maxdist) - coords_o = coords[ods[:, 0]] - coords_d = coords[ods[:, 1]] + radius = maxdist * (2**0.5) + connectors = query_pairs(coords, radius) + coords_o = coords[connectors[:, 0]] + coords_d = coords[connectors[:, 1]] dcoords = coords_d - coords_o walk = (dcoords[:, :2]**2).sum(1)**0.5 # euclidean distance on xy wait = dcoords[:, 2] - walk - cond = (wait > 0) & ((walk+wait) <= maxdist) + is_feasible = (wait > 0) & ((walk+wait) <= maxdist) + connectors = connectors[is_feasible] - return ods[cond] + return connectors + + +def get_transfer_connectors(data: GTFSData, config: Config): + pass def get_access_connectors(data: GTFSData, config: Config): diff --git a/tests/test_connectors.py b/tests/test_connectors.py index bd9de9a..386977a 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -34,7 +34,6 @@ def test_query_all_valid_included(points, source): source_idx = find_index(points, *source) maxdist = 10 radius = maxdist * (2**0.5) - # ods = connectors.query_pairs(points, radius) ods = connectors.query_pairs_filter(points, radius) is_valid = get_valid_points(points, source_idx, maxdist) @@ -47,8 +46,8 @@ def test_query_all_included_valid(points, source): """ All results from the query are valid """ source_idx = find_index(points, *source) maxdist = 10 - radius = maxdist * (2**0.5) - ods = connectors.query_pairs_filter(points, radius) + + ods = connectors.query_pairs_filter(points, maxdist) is_valid = get_valid_points(points, source_idx, maxdist) ds = ods[ods[:, 0] == source_idx, 1] From cc2072dfca6de492d72a535b2fb4000dbf97acbb Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 8 Dec 2023 11:38:38 +0000 Subject: [PATCH 08/30] transfer object --- gtfs_skims/connectors.py | 107 ++++++++++++++++++++++++++++++++++----- tests/test_connectors.py | 17 +++++-- 2 files changed, 105 insertions(+), 19 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index 4b371c2..a4e4afc 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -1,4 +1,8 @@ +from __future__ import annotations +from dataclasses import dataclass +from functools import cached_property import os +from typing import Optional import numpy as np from scipy.spatial import KDTree @@ -7,6 +11,17 @@ def query_pairs(coords: np.array, radius: float) -> np.array: + """Get origin-destination pairs between points, within a radius. + The connections are forward-looking in z: ie the destination point + has always greater z coordinate than the origin point. + + Args: + coords (np.array): Point coordinates (x, y, z) + radius (float): Maximum distance between points + + Returns: + np.array: Feasible connections between points. + """ ids = coords[:, 2].argsort() dtree = KDTree(coords[ids]) @@ -15,20 +30,84 @@ def query_pairs(coords: np.array, radius: float) -> np.array: return ids[connectors] -def query_pairs_filter(coords: np.array, maxdist: float) -> np.array: - radius = maxdist * (2**0.5) - connectors = query_pairs(coords, radius) - coords_o = coords[connectors[:, 0]] - coords_d = coords[connectors[:, 1]] - - dcoords = coords_d - coords_o - walk = (dcoords[:, :2]**2).sum(1)**0.5 # euclidean distance on xy - wait = dcoords[:, 2] - walk - - is_feasible = (wait > 0) & ((walk+wait) <= maxdist) - connectors = connectors[is_feasible] - - return connectors +@dataclass +class TransferConnector: + """ Manages transfer connectors. """ + coords: np.array + ods: np.array + # route_id: np.array + # service_id: np.array + + @cached_property + def ocoords(self) -> np.array: + """Origin coordinates. + + Returns: + np.array: x, y, z + """ + return self.coords[self.ods[:, 0]] + + @cached_property + def dcoords(self) -> np.array: + """Destination coordinates. + + Returns: + np.array: x, y, z + """ + return self.coords[self.ods[:, 1]] + + @cached_property + def walk(self) -> np.array: + """Walk distance (euclidean). + + Returns: + np.array: Distance from origin to destination point (on the xy axis). + """ + walk = ((self.dcoords[:, :2]-self.ocoords[:, :2])**2).sum(1)**0.5 + return walk + + @cached_property + def wait(self) -> np.array: + """Wait distance. It is calculated as the difference between timestamps (dz) + and the distance required to walk to the destination. + + Returns: + np.array: Wait distance. + """ + wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk + return wait + + def filter(self, cond: np.array[bool]) -> TransferConnector: + """Filter (in-place) Connnectors' origin-destination data based on a set of conditions. + + Args: + cond np.array[bool]: The boolean condition filter to use. + + Returns: + TransferConnector: Filtered Connectors object. + """ + self.ods = self.ods[cond] + self.ocoords = self.ocoords[cond] + self.dcoords = self.dcoords[cond] + self.walk = self.walk[cond] + self.wait = self.wait[cond] + # self.route_id = self.route_id[cond] + # self.service_id = self.service_id[cond] + + return self + + def filter_feasible_transfer(self, maxdist): + is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist) + return self.filter(is_feasible) + + def filter_max_walk(self, max_walk): + pass + + def filter_max_wait(self, max_wait): + pass + + def filter_same_route(self): + pass def get_transfer_connectors(data: GTFSData, config: Config): diff --git a/tests/test_connectors.py b/tests/test_connectors.py index 386977a..da01308 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -34,7 +34,7 @@ def test_query_all_valid_included(points, source): source_idx = find_index(points, *source) maxdist = 10 radius = maxdist * (2**0.5) - ods = connectors.query_pairs_filter(points, radius) + ods = connectors.query_pairs(points, radius) is_valid = get_valid_points(points, source_idx, maxdist) ds = ods[ods[:, 0] == source_idx, 1] @@ -46,9 +46,16 @@ def test_query_all_included_valid(points, source): """ All results from the query are valid """ source_idx = find_index(points, *source) maxdist = 10 - - ods = connectors.query_pairs_filter(points, maxdist) + radius = maxdist * (2**0.5) + + ods = connectors.query_pairs(points, radius) + tc = connectors.TransferConnector(points, ods).\ + filter_feasible_transfer(maxdist) + ods_filtered = tc.ods + is_valid = get_valid_points(points, source_idx, maxdist) - ds = ods[ods[:, 0] == source_idx, 1] - assert all(is_valid[ds]) + ds = ods_filtered[ods_filtered[:, 0] == source_idx, 1] + + assert is_valid[ds].sum() == is_valid.sum() + assert len(is_valid[ds]) > 0 and all(is_valid[ds]) From 665ee68549945f1407073878bb194b9892c0ce59 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 8 Dec 2023 18:11:43 +0000 Subject: [PATCH 09/30] transfer filtering --- gtfs_skims/connectors.py | 136 +++++++++++++++++++++++++------ tests/test_connectors.py | 85 ++++++++++++++++--- tests/test_data/config_demo.yaml | 2 +- 3 files changed, 183 insertions(+), 40 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index a4e4afc..44a160b 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -30,13 +30,13 @@ def query_pairs(coords: np.array, radius: float) -> np.array: return ids[connectors] -@dataclass -class TransferConnector: +class TransferConnectors: """ Manages transfer connectors. """ - coords: np.array - ods: np.array - # route_id: np.array - # service_id: np.array + + def __init__(self, coords: np.array, max_tranfer_distance: float) -> None: + self.coords = coords + radius = max_tranfer_distance * (2**0.5) + self.ods = query_pairs(coords, radius=radius) @cached_property def ocoords(self) -> np.array: @@ -77,41 +77,123 @@ def wait(self) -> np.array: wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk return wait - def filter(self, cond: np.array[bool]) -> TransferConnector: + def filter(self, cond: np.array[bool]) -> None: """Filter (in-place) Connnectors' origin-destination data based on a set of conditions. Args: cond np.array[bool]: The boolean condition filter to use. - - Returns: - TransferConnector: Filtered Connectors object. """ - self.ods = self.ods[cond] - self.ocoords = self.ocoords[cond] - self.dcoords = self.dcoords[cond] - self.walk = self.walk[cond] - self.wait = self.wait[cond] - # self.route_id = self.route_id[cond] - # self.service_id = self.service_id[cond] + ods = self.ods + ocoords = self.ocoords + dcoords = self.dcoords + walk = self.walk + wait = self.wait + + self.ods = ods[cond] + self.ocoords = ocoords[cond] + self.dcoords = dcoords[cond] + self.walk = walk[cond] + self.wait = wait[cond] return self - def filter_feasible_transfer(self, maxdist): + def filter_feasible_transfer(self, maxdist: float) -> None: + """Remove any connections with insufficient transfer time. + + + Args: + maxdist (float): Maximum transfer distance (walk+wait) + """ is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist) - return self.filter(is_feasible) + self.filter(is_feasible) + + def filter_max_walk(self, max_walk: float) -> None: + """Remove any connections beyond a walk-distance threshold. - def filter_max_walk(self, max_walk): - pass + Args: + max_walk (float): Max walk distance + """ + cond = (self.walk <= max_walk) + self.filter(cond) - def filter_max_wait(self, max_wait): - pass + def filter_max_wait(self, max_wait: float) -> None: + """Remove any connections beyond a wait distance threshold. + + Args: + max_wait (float): Maximum stop (leg) wait time. + """ + self.filter(self.wait <= max_wait) - def filter_same_route(self): - pass + def filter_same_route(self, routes: np.array) -> None: + """Remove connections between services of the same route. + Args: + routes (np.array): Route IDs array. Its indexing matches the self.coords table. + """ + self.filter( + routes[self.ods[:, 0]] != routes[self.ods[:, 1]] + ) -def get_transfer_connectors(data: GTFSData, config: Config): - pass + def filter_nearest_service(self, services: np.array) -> None: + """If a service can be accessed from a origin through multiple stops, + then only keep the most efficient transfer for that connection. + + Args: + services (np.array): Service IDs array. Its indexing must match the self.coords table. + """ + services_d = services[self.ods[:, 1]] # destination service + + # sort by trasfer distance + transfer = self.wait + self.walk + idx_sorted = transfer.argsort() + + # create origin-service combinations + order_o = int(np.floor(np.log10(services.max()))+1) + comb = (self.ods[:, 0]+1) * 10**order_o + services_d + + # get first instance of each origin-service combination + # (which corresponds to the most efficient transfer) + keep = idx_sorted[np.unique(comb[idx_sorted], return_index=True)[1]] + cond = np.isin(np.arange(len(comb)), keep) + + self.filter(cond) + + +def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: + time_to_distance = config.walk_speed/3.6 # km/hr to meters + max_tranfer_distance = config.max_transfer_time * time_to_distance + max_wait_distance = config.max_wait * time_to_distance + + # get candidate connectors + coords = data.stop_times[['x', 'y', 'departure_s']] + tc = TransferConnectors(coords, max_tranfer_distance) + + # apply narrower filters + tc.filter_feasible_transfer(max_tranfer_distance) + + if config.walk_distance_threshold < max_tranfer_distance: + tc.filter_max_walk() + + if max_wait_distance < max_tranfer_distance: + tc.filter_max_wait() + + routes = data.stop_times['trip_id'].map( + data.trips.set_index('trip_id')['route_id'] + ) + tc.filter_same_route(routes) + + services = data.stop_times['trip_id'].map( + data.trips.set_index('trip_id')['service_id'] + ) + tc.filter_nearest_service(services) + + arr = np.array([ + tc.ods[:, 0], # origin index + tc.ods[:, 1], # destination index + tc.walk, # walk distance (meters) + tc.wait/time_to_distance*3600 # wait time (seconds???) + ]) + return arr def get_access_connectors(data: GTFSData, config: Config): diff --git a/tests/test_connectors.py b/tests/test_connectors.py index da01308..ffdfff9 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -1,3 +1,4 @@ +from collections import defaultdict import itertools import numpy as np @@ -13,6 +14,11 @@ def points(): return coords +@pytest.fixture() +def transfer_connectors(points): + return connectors.TransferConnectors(points, 10) + + def find_index(coords, x, y, z): idx = np.where(np.all(coords == np.array([x, y, z]), axis=1))[0][0] return idx @@ -33,12 +39,13 @@ def test_query_all_valid_included(points, source): """ All valid points are included in the query results """ source_idx = find_index(points, *source) maxdist = 10 + is_valid = get_valid_points(points, source_idx, maxdist) + radius = maxdist * (2**0.5) ods = connectors.query_pairs(points, radius) - is_valid = get_valid_points(points, source_idx, maxdist) - ds = ods[ods[:, 0] == source_idx, 1] - assert is_valid[ds].sum() == is_valid.sum() + dest = ods[ods[:, 0] == source_idx, 1] + assert is_valid[dest].sum() == is_valid.sum() @pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) @@ -46,16 +53,70 @@ def test_query_all_included_valid(points, source): """ All results from the query are valid """ source_idx = find_index(points, *source) maxdist = 10 - radius = maxdist * (2**0.5) + is_valid = get_valid_points(points, source_idx, maxdist) - ods = connectors.query_pairs(points, radius) - tc = connectors.TransferConnector(points, ods).\ - filter_feasible_transfer(maxdist) - ods_filtered = tc.ods + tc = connectors.TransferConnectors(points, maxdist) + tc.filter_feasible_transfer(maxdist) + dest = tc.ods[tc.ods[:, 0] == source_idx, 1] - is_valid = get_valid_points(points, source_idx, maxdist) + assert is_valid[dest].sum() == is_valid.sum() + assert len(is_valid[dest]) > 0 and all(is_valid[dest]) + + +def test_filter_transfer_walk(transfer_connectors): + max_walk = 5 + assert transfer_connectors.walk.max() > max_walk + transfer_connectors.filter_max_walk(max_walk) + assert transfer_connectors.walk.max() <= max_walk + + +def test_filter_transfer_wait(transfer_connectors): + max_wait = 5 + assert transfer_connectors.wait.max() > max_wait + transfer_connectors.filter_max_wait(max_wait) + assert transfer_connectors.wait.max() <= max_wait + + +def test_filter_same_route(transfer_connectors): + # assume all even-to-even point ID are in the same route + routes = np.arange(len(transfer_connectors.coords)) + routes = np.where(routes % 2, -1, routes) + transfer_connectors.filter_same_route(routes) + assert (transfer_connectors.ods % 2).prod(1).sum() == 0 + + +def get_o_service_transfers(conn, services_d): + transfer_times = conn.wait + conn.walk + d = defaultdict(list) + for i in range(len(services_d)): + d[(conn.ods[i, 0], services_d[i]) + ].append(transfer_times[i]) + return d + + +def test_filter_nearest_service(transfer_connectors): + np.random.seed(0) + services = np.random.randint( + 0, 2, size=transfer_connectors.coords.shape[0]) + services_d = services[transfer_connectors.ods[:, 1]] + + # for every origin-service pair there are multiple connections + transfer_times = transfer_connectors.wait + transfer_connectors.walk + d_before = get_o_service_transfers(transfer_connectors, services_d) + + assert max(map(len, d_before.values())) > 0 + + # after filtering, there is only one and it is the + # one with the minumum transfer time. + transfer_connectors.filter_nearest_service(services) + services_d = services[transfer_connectors.ods[:, 1]] + + d_after = get_o_service_transfers(transfer_connectors, services_d) - ds = ods_filtered[ods_filtered[:, 0] == source_idx, 1] + # didn't lose any origin-service pairs + assert len(d_before) == len(d_after) + # single connection per origin-service + assert max(map(len, d_after.values())) == 1 - assert is_valid[ds].sum() == is_valid.sum() - assert len(is_valid[ds]) > 0 and all(is_valid[ds]) + for o, service in d_before.keys(): + d_after[(o, service)][0] == min(d_before[(o, service)]) diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index e5b6bdf..5fdc0ab 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -13,7 +13,7 @@ settings: crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances max_transfer_time : 1800 # Max combined time of walking and waiting (sec) k : 500 # max nearest neighbours when calculating distances - max_wait : 1800 # sec | Max wait time at a stop + max_wait : 1800 # sec | Max wait time at a stop / leg bounding_box : null epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. From 8eb8eee2c0b29bf226cf5027916105d59b6f1cee Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 8 Dec 2023 18:37:50 +0000 Subject: [PATCH 10/30] handle transfers --- gtfs_skims/connectors.py | 37 +++++++++++++++++++++---------------- tests/test_connectors.py | 13 +++++++++++-- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index 44a160b..48477b7 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -10,7 +10,7 @@ from gtfs_skims.utils import Config, GTFSData, get_logger -def query_pairs(coords: np.array, radius: float) -> np.array: +def query_pairs(coords: np.ndarray, radius: float) -> np.array: """Get origin-destination pairs between points, within a radius. The connections are forward-looking in z: ie the destination point has always greater z coordinate than the origin point. @@ -33,7 +33,7 @@ def query_pairs(coords: np.array, radius: float) -> np.array: class TransferConnectors: """ Manages transfer connectors. """ - def __init__(self, coords: np.array, max_tranfer_distance: float) -> None: + def __init__(self, coords: np.ndarray, max_tranfer_distance: float) -> None: self.coords = coords radius = max_tranfer_distance * (2**0.5) self.ods = query_pairs(coords, radius=radius) @@ -77,7 +77,7 @@ def wait(self) -> np.array: wait = self.dcoords[:, 2] - self.ocoords[:, 2] - self.walk return wait - def filter(self, cond: np.array[bool]) -> None: + def filter(self, cond: np.ndarray[bool]) -> None: """Filter (in-place) Connnectors' origin-destination data based on a set of conditions. Args: @@ -124,7 +124,7 @@ def filter_max_wait(self, max_wait: float) -> None: """ self.filter(self.wait <= max_wait) - def filter_same_route(self, routes: np.array) -> None: + def filter_same_route(self, routes: np.ndarray) -> None: """Remove connections between services of the same route. Args: @@ -134,7 +134,7 @@ def filter_same_route(self, routes: np.array) -> None: routes[self.ods[:, 0]] != routes[self.ods[:, 1]] ) - def filter_nearest_service(self, services: np.array) -> None: + def filter_nearest_service(self, services: np.ndarray) -> None: """If a service can be accessed from a origin through multiple stops, then only keep the most efficient transfer for that connection. @@ -165,34 +165,39 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: max_wait_distance = config.max_wait * time_to_distance # get candidate connectors - coords = data.stop_times[['x', 'y', 'departure_s']] + coords = data.stop_times[['x', 'y', 'departure_s']].values tc = TransferConnectors(coords, max_tranfer_distance) # apply narrower filters + # enough time to make transfer tc.filter_feasible_transfer(max_tranfer_distance) + # maximum walk if config.walk_distance_threshold < max_tranfer_distance: - tc.filter_max_walk() + tc.filter_max_walk(config.walk_distance_threshold) + # maximum wait if max_wait_distance < max_tranfer_distance: - tc.filter_max_wait() + tc.filter_max_wait(max_wait_distance) + # not same route routes = data.stop_times['trip_id'].map( data.trips.set_index('trip_id')['route_id'] - ) + ).values tc.filter_same_route(routes) + # most efficient transfer to service services = data.stop_times['trip_id'].map( data.trips.set_index('trip_id')['service_id'] - ) + ).values tc.filter_nearest_service(services) - arr = np.array([ - tc.ods[:, 0], # origin index - tc.ods[:, 1], # destination index - tc.walk, # walk distance (meters) - tc.wait/time_to_distance*3600 # wait time (seconds???) - ]) + # construct array + arr = np.concatenate([ + tc.ods, # origin and destination index + (tc.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) + (tc.wait/time_to_distance).reshape(-1, 1) # wait time (seconds) + ], axis=1).round(1).astype(np.uint32) return arr diff --git a/tests/test_connectors.py b/tests/test_connectors.py index ffdfff9..736f914 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -1,10 +1,11 @@ from collections import defaultdict import itertools +import os import numpy as np import pytest -from gtfs_skims import connectors +from gtfs_skims import connectors, preprocessing @pytest.fixture() @@ -101,7 +102,6 @@ def test_filter_nearest_service(transfer_connectors): services_d = services[transfer_connectors.ods[:, 1]] # for every origin-service pair there are multiple connections - transfer_times = transfer_connectors.wait + transfer_connectors.walk d_before = get_o_service_transfers(transfer_connectors, services_d) assert max(map(len, d_before.values())) > 0 @@ -120,3 +120,12 @@ def test_filter_nearest_service(transfer_connectors): for o, service in d_before.keys(): d_after[(o, service)][0] == min(d_before[(o, service)]) + + +def test_get_transfer_array(config, tmpdir): + path_outputs = os.path.join(tmpdir, 'outputs') + config.path_outputs = path_outputs + gtfs_data = preprocessing.main(config) + arr = connectors.get_transfer_connectors(gtfs_data, config) + assert len(arr) > 0 + assert isinstance(arr, np.ndarray) \ No newline at end of file From 47bc57c36094f196029a427508dd4370543434e8 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 11 Dec 2023 14:23:29 +0000 Subject: [PATCH 11/30] access egress --- .gitignore | 2 +- gtfs_skims/cli.py | 23 ++- gtfs_skims/connectors.py | 151 ++++++++++++++++-- gtfs_skims/preprocessing.py | 2 +- gtfs_skims/utils.py | 1 - gtfs_skims/variables.py | 4 + tests/conftest.py | 8 +- tests/test_cli.py | 1 - tests/test_connectors.py | 46 +++++- tests/test_data/config_demo.yaml | 1 - tests/test_data/outputs/calendar.parquet.gzip | Bin 0 -> 3098 bytes tests/test_data/outputs/routes.parquet.gzip | Bin 0 -> 2431 bytes .../test_data/outputs/stop_times.parquet.gzip | Bin 0 -> 8296 bytes tests/test_data/outputs/stops.parquet.gzip | Bin 0 -> 8421 bytes tests/test_data/outputs/trips.parquet.gzip | Bin 0 -> 1475 bytes 15 files changed, 213 insertions(+), 26 deletions(-) create mode 100644 tests/test_data/outputs/calendar.parquet.gzip create mode 100644 tests/test_data/outputs/routes.parquet.gzip create mode 100644 tests/test_data/outputs/stop_times.parquet.gzip create mode 100644 tests/test_data/outputs/stops.parquet.gzip create mode 100644 tests/test_data/outputs/trips.parquet.gzip diff --git a/.gitignore b/.gitignore index fb87f1a..5f9b874 100755 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,4 @@ mike-*.yml .ipynb_checkpoints sandbox.py -tests/test_data/outputs/ \ No newline at end of file +tests/test_data/outputs/*log \ No newline at end of file diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py index 0707407..328bd9e 100755 --- a/gtfs_skims/cli.py +++ b/gtfs_skims/cli.py @@ -3,12 +3,29 @@ import click +from gtfs_skims.preprocessing import main as main_preprocessing +from gtfs_skims.connectors import main as main_connectors +from gtfs_skims.utils import Config + + @click.version_option(package_name="gtfs_skims") -@click.command() +@click.group def cli(args=None): """Console script for gtfs_skims.""" click.echo( - "Replace this message by putting your code into gtfs_skims.cli.cli" + "Console script for Argo (gtfs_skims)." ) - click.echo("See click documentation at https://click.palletsprojects.com/") return 0 + + +@cli.command() +@click.argument('config_path') +def run(config_path: str): + config = Config.from_yaml(config_path) + steps = config.steps + + if 'preprocessing' in steps: + main_preprocessing(config=config) + + if 'connectors' in steps: + main_connectors(config=config) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index 48477b7..8d15bfd 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -6,8 +6,10 @@ import numpy as np from scipy.spatial import KDTree +import pandas as pd from gtfs_skims.utils import Config, GTFSData, get_logger +from gtfs_skims.variables import DATA_TYPE def query_pairs(coords: np.ndarray, radius: float) -> np.array: @@ -159,6 +161,74 @@ def filter_nearest_service(self, services: np.ndarray) -> None: self.filter(cond) +def query_pairs_od( + coords_origins: np.ndarray, + coords_destinations: np.ndarray, + radius: float +) -> np.array: + """Get origin-destination pairs between points, within a radius. + + Args: + coords_origins (np.array): Coordinates of origin points + coords_destinations (np.array): Coordinates of destination points + radius (float): Maximum distance between points + + Returns: + np.array: Feasible connections between points. + """ + tree_origins = KDTree(coords_origins) + tree_destinations = KDTree(coords_destinations) + + ods = tree_origins.query_ball_tree( + tree_destinations, r=radius) + + # flatten + ods = np.column_stack([ + np.repeat(range(len(coords_origins)), list(map(len, ods))), + np.concatenate(ods) + ]).astype(DATA_TYPE) + + return ods + + +class AccessEgressConnectors(TransferConnectors): + """ Connections between zones/endpoints and stops """ + + def __init__( + self, + coords_origins: np.ndarray, + coords_destinations: np.ndarray, + max_tranfer_distance: float + ) -> None: + self.coords_origins = coords_origins + self.coords_destinations = coords_destinations + + radius = max_tranfer_distance + if coords_origins.shape[1] == 3: + radius += max_tranfer_distance * (2**0.5) + + self.ods = query_pairs_od(coords_origins, coords_destinations, + radius=radius) + + @cached_property + def ocoords(self) -> np.array: + """Origin coordinates. + + Returns: + np.array: x, y (, z) + """ + return self.coords_origins[self.ods[:, 0]] + + @cached_property + def dcoords(self) -> np.array: + """Destination coordinates. + + Returns: + np.array: x, y (,z) + """ + return self.coords_destinations[self.ods[:, 1]] + + def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: time_to_distance = config.walk_speed/3.6 # km/hr to meters max_tranfer_distance = config.max_transfer_time * time_to_distance @@ -168,7 +238,7 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: coords = data.stop_times[['x', 'y', 'departure_s']].values tc = TransferConnectors(coords, max_tranfer_distance) - # apply narrower filters + # apply more narrow filters: # enough time to make transfer tc.filter_feasible_transfer(max_tranfer_distance) @@ -197,30 +267,89 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: tc.ods, # origin and destination index (tc.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) (tc.wait/time_to_distance).reshape(-1, 1) # wait time (seconds) - ], axis=1).round(1).astype(np.uint32) + ], axis=1).round(1).astype(DATA_TYPE) + return arr -def get_access_connectors(data: GTFSData, config: Config): - # ... query ball tree - pass +def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.ndarray): + time_to_distance = config.walk_speed/3.6 # km/hr to meters + max_tranfer_distance = config.max_transfer_time * time_to_distance + max_wait_distance = config.max_wait * time_to_distance + # get candidate connectors + coords_stops = data.stop_times[['x', 'y', 'departure_s']].values + ac = AccessEgressConnectors( + coords_origins, coords_stops, max_tranfer_distance) -def get_egress_connectors(data: GTFSData, config: Config): - # ... query ball tree - pass + # more narrow filtering + ac.filter_feasible_transfer(max_tranfer_distance) + if config.walk_distance_threshold < max_tranfer_distance: + ac.filter_max_walk(config.walk_distance_threshold) + if max_wait_distance < max_tranfer_distance: + ac.filter_max_wait(max_wait_distance) + arr = np.concatenate([ + ac.ods, # origin and destination index + (ac.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) + (ac.wait/time_to_distance).reshape(-1, 1) # wait time (seconds) + ], axis=1).round(1).astype(DATA_TYPE) -def main(data: GTFSData, config: Config): + return arr + + +def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: np.ndarray): + time_to_distance = config.walk_speed/3.6 # km/hr to meters + + # get candidate connectors + coords_stops = data.stop_times[['x', 'y']].values + ec = AccessEgressConnectors( + coords_stops, coords_destinations, config.walk_distance_threshold) + + arr = np.concatenate([ + ec.ods, # origin and destination index + (ec.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) + np.array([0]*len(ec.ods)).reshape(-1, 1) # wait time = 0 + ], axis=1).round(1).astype(DATA_TYPE) + + return arr + + +def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]: logger = get_logger(os.path.join( config.path_outputs, 'log_connectors.log')) + coords_origins = pd.read_csv(config.path_origins) + coords_destinations = pd.read_csv(config.path_destinations) # get feasible connections logger.info('Getting transfer connectors...') transfer_connectors = get_transfer_connectors(data, config) logger.info('Getting access connectors...') - access_connectors = get_access_connectors(data, config) + access_connectors = get_access_connectors(data, config, coords_origins) logger.info('Getting egress connectors...') - egress_connectors = get_egress_connectors(data, config) + egress_connectors = get_egress_connectors( + data, config, coords_destinations) + + # convert to dataframe + colnames = ['onode', 'dnode', 'walk', 'wait'] + transfer_connectors = pd.DataFrame(transfer_connectors, columns=colnames) + access_connectors = pd.DataFrame(access_connectors, columns=colnames) + egress_connectors = pd.DataFrame(egress_connectors, columns=colnames) + + # offset IDs for endpoints + access_connectors['onode'] += (len(data.stop_times)+1) + egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)+2) # save + logger.info(f'Saving connectors to f{config.path_outputs}...') + transfer_connectors.to_parquet( + os.path.join(config.path_outputs, 'connectors_transfer.parquet') + ) + access_connectors.to_parquet( + os.path.join(config.path_outputs, 'connectors_access.parquet') + ) + egress_connectors.to_parquet( + os.path.join(config.path_outputs, 'connectors_egress.parquet') + ) + + return transfer_connectors, access_connectors, egress_connectors diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py index f637d06..419e82e 100644 --- a/gtfs_skims/preprocessing.py +++ b/gtfs_skims/preprocessing.py @@ -144,7 +144,7 @@ def main(config: Config) -> GTFSData: """Run the preprocessing pipeline and save resulting tables to disk. Args: - config (Config): Path Config object. + config (Config): Config object. Returns: GTFSData: Pre-processed GTFS data object. diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index 802c6bf..3482a17 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -111,7 +111,6 @@ class Config: calendar_date: int crows_fly_factor: float max_transfer_time: int - k: int end_s: int bounding_box: dict epsg_centroids: int diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py index f1e9591..c6ded4e 100644 --- a/gtfs_skims/variables.py +++ b/gtfs_skims/variables.py @@ -1,3 +1,7 @@ +import numpy as np + +DATA_TYPE = np.uint32 + # route types lookup # source: https://developers.google.com/transit/gtfs/reference#routestxt # and https://developers.google.com/transit/gtfs/reference/extended-route-types diff --git a/tests/conftest.py b/tests/conftest.py index 2022fb1..bc546af 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,6 +31,12 @@ def response(): def config(): return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml')) + @pytest.fixture def gtfs_data(): - return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) \ No newline at end of file + return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) + + +@pytest.fixture +def gtfs_data_preprocessed(): + return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs')) diff --git a/tests/test_cli.py b/tests/test_cli.py index cb08971..f5a59d0 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,6 @@ def test_command_line_interface(): runner = CliRunner() result = runner.invoke(cli.cli) assert result.exit_code == 0 - assert "gtfs_skims.cli.cli" in result.output help_result = runner.invoke(cli.cli, ["--help"]) assert help_result.exit_code == 0 assert ( diff --git a/tests/test_connectors.py b/tests/test_connectors.py index 736f914..2922397 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -4,6 +4,7 @@ import numpy as np import pytest +import unittest.mock as mock from gtfs_skims import connectors, preprocessing @@ -122,10 +123,43 @@ def test_filter_nearest_service(transfer_connectors): d_after[(o, service)][0] == min(d_before[(o, service)]) -def test_get_transfer_array(config, tmpdir): - path_outputs = os.path.join(tmpdir, 'outputs') - config.path_outputs = path_outputs - gtfs_data = preprocessing.main(config) - arr = connectors.get_transfer_connectors(gtfs_data, config) +def test_get_transfer_array(gtfs_data_preprocessed, config): + arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config) assert len(arr) > 0 - assert isinstance(arr, np.ndarray) \ No newline at end of file + assert isinstance(arr, np.ndarray) + + +def test_get_od_pairs(): + ods = connectors.query_pairs_od( + np.array([[0, 0], [1, 1]]), + np.array([[0.5, 0.5], [2, 1], [2, 2]]), + radius=1 + ) + expected = np.array([ + [0, 0], + [1, 0], + [1, 1] + ]) + np.testing.assert_equal(ods, expected) + + +def test_get_od_walk(): + egress = connectors.AccessEgressConnectors( + np.array([[0, 0], [1, 1]]), + np.array([[0.5, 0.5], [2, 1], [2, 2]]), + max_tranfer_distance=1 + ) + walk = egress.walk + expected = np.array([ + (2*0.5**2)**0.5, (2*0.5**2)**0.5, 1 + ]) + np.testing.assert_almost_equal(walk, expected) + + +def test_convert_distance_3d(): + egress = connectors.AccessEgressConnectors( + np.array([[0, 0, 0]]), + np.array([[1, 1, 1]]), + max_tranfer_distance=1 + ) + assert len(egress.ods) == 1 # radius has been adjusted to 3D space diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index 5fdc0ab..4196b30 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -12,7 +12,6 @@ settings: walk_speed : 4.5 # kph | Walking speed crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances max_transfer_time : 1800 # Max combined time of walking and waiting (sec) - k : 500 # max nearest neighbours when calculating distances max_wait : 1800 # sec | Max wait time at a stop / leg bounding_box : null epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. diff --git a/tests/test_data/outputs/calendar.parquet.gzip b/tests/test_data/outputs/calendar.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..f760856622100722a9d0176c217c983ebdc2fdde GIT binary patch literal 3098 zcmcJS!E4h{9LHa~F3UPZ5ff4^8geKTy3%e#hNvit4Dm2_@Suz?ZCP7Hx5Qcb|2XC7q!obUz7jI649R+Fp*n~AYe~ zy$JG)=G+Fa6(`?*Edp@%>C4m9VlqNyT%Y@hRd2pw(a$eK7kyDOvoXmz)p=;Df(V3^ z&`JnNsb9A=xJ+-;4z!o_DlF?7o#V{9rKwGNKY(25uoHt%pk{@qcr@?Yp&+hi>#T@( zROH(14ClBZpR|Ki8f&^1CB5Ahpa@5hWl*&X#%(&hpNSyc?7%^iiXM+wY2BoqVZvFA~;yJP;V zl=ur|)RV`~K#&JrxrDKR89M`*@$B;iVFaTvLwr9da(K~7TtmhC!;yXutti#H;hI+2 z-@MRKuF8*Al$s6=t!$|##P^nK7=A!M`sJMpy zDSC()DCKoS*UxNTl;r|N$18Fz(m;e9( literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/routes.parquet.gzip b/tests/test_data/outputs/routes.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..a5b796d61c9c1693d31c481f9d1d4be7d34aff6a GIT binary patch literal 2431 zcmWG=3^EjD5X}*d(h=nXGT1~p7#QTcfdmNr4?Vh*lYxQbz=0nP5B@RVUh;94cLMX$ z>D&45GnQ@=^)Lgf0qah}s@wGFMqUO67M26`OEs@1arW+1{j|=K`2o+PC3<(OzC0=w(-_1Pz`zjQZ+1WrVtI&Y1b(eO|*46v?pBC=cyru6Z z1qukyRtXBoar$THbs~k=s$$RMbH`?!e!1>G>ymZ-EN1=>gqEFP5v{Qd`otu^XmynS zx{8oJ&z%qEXVwI2UJTwUmHJ?g)53j6{S%Mn9(Ys~e37e|y}r{U_kC#L%x#6mcesws ze4MZm;<`P|q9?GsF4T7~7n32+x<0*qhdMVm&n{*y6=2UV;xW1LIEyJWd*^m@L0LWL zA9a(L7Uyk^c)jmX$K7?Wl`qU-yS1w1Nn7OBVo@f}t~?!8v$VtU<%>7|7Fko-sL9Bx z{iCtSal@3E)x4GKGzGf7_$P>87ZRPvQk)YI@a)Lpk`0p#`pb4LGxyT-`F7&;ho1UN z?$g>d&;EL6F2DC@;>3HfluOG&MOb2*CgOG{GYGgBBuc~qGsI1|%T^O7rp zd{tmt1c?Yh6clIV7nQ{4CFZ8WB={i`Ir(|%DB@fY@s!l!WROy*wNTNL%7Rn|F)m3N z$r?2d8BrEVQ3gqxWxJx#cCuO#5imuIM{*S2LTCa?qOgMV_=Y!0cH>eQ6`|c zS_>o00JtY$X0$K@4QXKnn$pGyHUwx&0+fYjN(YmSC=b{f2)~1ksS_&$8q&dJ0}M=L zYyZJDU<3_VUzu1HP~SfceLt9C7NLh6SX&3P*d8XJ#vjb+7H(mKX+#QIu%1WEV(&nD zwy>e=S;Pqo8>D~->j8!^$X|;%fq}ye2^(PGz*wk(Bi6wq#s&1e1Y1F3UP@xI#AA+X zrR4ma(%igwa573QRhG!>jP%TkMqGxPI6GDZeQ#(IVZdWN7R1IbeH zNZAsa$v{R!voo?@JwrWXP%r|GF3Btbnux45ww6Ic56NQPg36MN{5%Dy0Sa)N6*Q7c TGjmcD3^W;DvoSCP0GnO_oqf%} literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/stop_times.parquet.gzip b/tests/test_data/outputs/stop_times.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..29ecd67f7f10a602c6edc3eeb7076283075ac3df GIT binary patch literal 8296 zcmeHNc{r5qzqb_%lVungONfeSvJD2U6tbi&uXQX*c4He`MOj138)BjeEwZ%8GK^&y zy;+K~WE<<)2ZNcho{?xdz5RaYyyv>kALl%mYp#3l`}_HP?)&?E?(cJ7&oDT876POL zriufbnWcdhOtkiPAPXIxa18BZ{aRC~IiAQ%YgDik&*NM=Er=HcRUA7DU2|PKeG69& zBHWYT&9Uy-F*ere>|#&>jVCkd&xz|V@aWIW>yz1<=A@ezz)kb8CNhKUoT%&q7qcq% zAqS#J3_Q9|EAUAove?8w(**gt&Hs5Dvdj)V4#lZfM(KIIFhY&VXRB7csxkF{jii{?t*k)Jf&k zabzlSo7KoktBJiFzLCcThds2Q&4zIcwdGfpkT2i(KYN2L_4Cj6L%t#V=aG>Z0ObLI znjcSj98WFMr##Z9zG|Z6G*QvAlyq6@i#*EHJZgzMCCi;!NTlQvspZ0yhr-l?G|H1S zYOysX)0+Ca7g}2ohd^_qM#LpmYo6#KO5{+Z(vGUQ;zmUIPKyyyzEW(a45C=dV)TGX zY3=J=M43R>h=fvU&2u+IscP4#j9)2EU~yaAlrS{p2%bq&kfv=>lM#l5WJBz6uCMAD;%JqCcZb96xRPwgL>7j*A$0e$-C~U8FLIN@ba|Q zNR8tXRn0uhAn;w2cr7%4Je#TqhMAm!(_Zhv&h%_hVB zxh-(wE?~AXGeAqlxKi*LKJKo;D!OBw?L`YI?ykL6EbA0W2RGIrWqi9>a=wu#M_IBJ zB`Eba=(tcEvp#dDF}QO-s90N>H?S`_Hcf*NM7ZQZFs1#WT!p5q+Hb9y5*{ElgCv3= zf1=+jYxFrN1WW-6hhx#lsbJV>F<|yq*z?9b$zTBm$FMdOdU)YwTP<^*S2S)F{LRQ}-dw9Z#u`blMG7H&`v}b@xY+Gv`V= zUPBv^SAu2Z^k?IOtF^p%({#r13p%qFua(S94wiK!k51idV(cPBffquA%H7mG zR&xkGvYAlF?p!%%s*>5LptX+((p=#n^@^sVF^A-x^04KeWQn%2+xG*8!qK}2D_iG= z8#`O&k!O3w0fkv_Ge;Y&lsl3ojLuv$y`#d(K>!2R8q<(c&KL zde&Niq3}Y9>W{H4`ml1ZK!T3O7>ZRBV=xyP9~=!!k~1eW9908MS%KBIQ8T9T8QCs? zse1x=e>BT2L z#4pTy*WaNk)#+J5E8V6;7n1qc+7`*evuB!j$FpzjWkq2A|B~qbiMsz} z$~MHV|KDn~UHHGS)$YIP;~a!FI91;Y40C)Zs0|)gNVPK+$;rx7f2KGg{AJ-D3X3&D^w4V6g-GlQ51IELbA}}0W z`D^N&`lHL!j={_B0V~rfH_s3pM=03SZ~JasPCeH_ZMfV!1h1b1!5G$@@4|%MR z7M*fYR&Z6Uk-qWAnF8pD8D^EP&d`nZeTPBG;vEj3)VLcaC$b!24 zu-_FY=)HyakIZ0WC)-HTq?Dr5Tqh@vd`Q-nV4%F8kxq{ijhr`?-5xFN*3Z&ozpwed z0ciGMYo1;#@2s3nn4KBUxOhFR7Ls5>>E&5i${5W3W35 zMBrHDn4MV4o*l$D_(qhcy-6rBtFsfCVy#64nz%hw`S7rZ0aVf=>M-?w57sl7++FMe z;efUsck%Cr$U%4t{}leGW)Ss8Tu^eeuzQ!?VtMlVptRaTZTVCeHy5`S&Lz zOWwx{&7})7g0Q>7A)$sh;9%|~EKe3B%&ZV++}`{9K5Ve5DD)x4F#z&3^UyWO>R64#v$mX>w(x z-^4bH9MDwZ8nrzMcW(|gv?_`mNw;yhb0n~AxAW+Wqf(B__jM*gh%)4!*X;WlV)yp& zoRoUVe@;A-^Yp7oWzmr}H6?(*o4OLAA4MP!kfjfS*u=IWueY(UrM+yA$w;czFzu0O z?JK#`9-%RPsT3M@SLp;_4~|v7hv!)VJ+WU7RRt96=X9y|Fp&4&f%Dk)()7dLRy!u# z^iCP}sBuqOl>)}g{RwWKOc7Q6Pt9%FRSLE#1jQ&zPYea$fQO_N@e6qFvhHKcMITI_ zp-Zh5s_)l31IGQ~<()22IdTl?dTHC89BBFp3)6d}rfbT$B9QwW}STqfEOTvH=dz-kT&P0;B# zoU_>_M&XD_sWI|rZZZ8JS~9J0A^)S4&E}odLm2xLFN!J54&KMA-F<1!7theRB5;zs&lMg zr7KL&1(#M-tOKsC%rBbFGL6-9(PZzn<*@c91;>M`m#sXx74dZCRZbS3I`RV{u`NZIjG!^VK{Ou#&KOcNIVe=cGn^l--k#6V-5zy3 z3A;b%k!qC(FjCL0tns!63tbnRd%NHPIf+Bl8q*llSk|=#lgFl!NgmbdcY&z)AM(no@ zC^zb;j=z|N+tYQ*-R?25#cbgMkH*0E1<5NkHy-`Ljo~lX-MD?Hf~P>L7JHf3a)IbD&UJ7xj|S+J)Xfgo#)wIzYRB%*WLJZ!;OD# zxY35@#?f^*N)WY7AERk*EDMM?DoR~<#-(w8axj1-)=x zJygARrU4)02`Ts4Zpfz&;^}UHmcQ?9s{ty7;%iH(9zAO$4MG=F^`U?~EO63FfkVSt zH>4vW?-0G1;XN(n&UhRFF3CIbjz?^!Zr70QD>%pE^q!UReX^6MH41I$GIH7?Z&@NN zc!Q#fi!fq|`%CXNgqM_ z!~N8N+a#>t$H3|FSJIX2ifZurVt zIi_tJtPFi~D0ac3mRY}k&8ZQEK|N5$Eckb?9YLYPLVB(X-bVwZRB$ z0mvtk)``f{`cAXi3(pQa`;p?zsd-5IcoX%p+mqd|$nSfkf`h$2NiH$lw>=1gNlR%y zCX+7n-|hZ0;-b6IZMhpn@S!FQHsB5xzBdk#bRsYytr)}YJd#*X37lxE2}yggM)5_) zqW`>^4d+vFxNt|;C>S4S8@gn2*OE+wk6BMxW^te11YPPD8FnXpGEP*09j z?XAuDP_1Hp1AJsHwWM)x-TaA;ApM?V+0w}4Q5bF|+hI$yrbW8)z1F8tf2sHQlU5@> z^=7DRT`GrNU)BGpvz&h?nrpFleKK08@$r8L2Pig-;+ z5j|*y8014QA9E{lI9IqFp*X8-*0odpxYFmtnPOoj9s-GJI*tj{rS1u0HP>u5gi~;& zYOQ0~hZBX1QmV_AbH9lwiXwrC4y#Nn{3I}WvdEp8Xl@_fQEu;~rE^;sDH4M;F-PjR zOe<2g&>6gJ<#~jt>?n6mfgD*84V|7?u|cJL=HvWiZWFGxV^S&wQAox9-iz5&(6MPw zoLA5JWo_YHiIgSp`^%2QWun6V(uCBy>Fn7EqwvfbOclrlYv_$X!-nH3H^TZohgOH& znILASUK)$h{=AkNh7qLb(z|@I3j=@ssqF|_S#^a2dFg;mz$^4X2GBMqH+u*Ft*^}9 zUQQm?PWE&_w!I7>Hn^9UlP}!G+S|$1VK423yxz%S@8ALV^7ip^_@SE_;qAVTTo3!o z&Hkke;o#}x;AZEr)y=xj=Hc$-=DpRm^BX5(tCMw8{aY9Px0%u->3A!c_`&>W2^KJr zi62PE|8+4yCfWdRtvK%@Gd~?K3k<~4LVMG?et}p7HyU)bSL*y=+RIluAOo#OB6N#l zGXYy9RrEBH&~+B?eH&m9%aFuI>n9d0D;UW3Wg%e5bpMkUDo50QFZy>H|@;+vzA!fyYMkLE@rs~Igv$fg>jO$7=wAE^S%@l}* z{%h-})s{duxxU@dEuJI>8czv?#p)?&sV$%Wd*}?}9C+{wLHnnZ8+PiwCvNGokkH@I;>4RfV+f|G-by|o+M)q(ci&Bw(>T13>bI)+ zuhsu^>_6B4TGD^N?C*m9hZHxT82^Uu_n|+k|JQxB`BeJ7?AF6;J?tM!e!FLg-;4ef z_zz__jruP{e+c`BlK#J6-=miYZY9l{#tLp$I3ut z F{0GlC@j(Cp literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/stops.parquet.gzip b/tests/test_data/outputs/stops.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..10a1d132786d559f46e915dc36d64b1d4769af4a GIT binary patch literal 8421 zcmb_ibyQSqyQe!8n4vp`p%EM!DG3ookcI(F$z{kZu@SQo1gp z$8$XEobTSd);E95+VA_kzvuV#n!WcZOFZTVAOH%80a2(Rz#h`|H5vc|0f9RB`s@4W zN@Tc2iyZ;s<$$WL@npgC{euEa@+zkB^4Ag`2UxF%($w5a4hhd`y~PcL?y(riGRM&h zH@y*V8^VE-w2(GEkl{|?4uMiE#y3haamU|Lgpv)#Hz}HMCxUyRRQsBZ_kqQ6j3Btk z%nGvT620g$p(vbJbOpP9iLHK_ydEx8zk=?$#O%3D>N}kwsG_naFPyilbn)m0&fzW zVx_84n!`4pO$SakTGjN>$~K^8YEd+-`Btm4KX=mY5M!cuq%9il+$orf#sqKTo1bKI zCzJLV13$*M=qz)mqJoTX2WvJzPMOzCS#;5xlh#{+>dmX^Ey8Bz9?mRS&dg6Q`cQ8n zynmNoIEspZa2?SZ5Fi)hpQEcAV_rbWeajE|0SyD*P;l%PVdF(uGvw?S5 zwMCvxhejS`BgmYsACXB`tdBXDo@P|G)Qj1c6Ma1w4d(9vs_rN=ltoCHD^nMeYrADr zt~_LTec3J}oPWk!`U+ZIPvFRxuFf@c!TK6MZe6;}mUtMSzbP|+b%Yg`?}Y3gXigNW z71Tmn#Jx=g&T3{gb0QT&UcXQp7x=c1;UhS3^r3>AiK{PS9< zef{pZt#RExfe+@u&7L#TUwZKbN=|&)8?0+8GUq?%45>g`&5ysmzQ2$KJ&<;agmWYpDZ{npY{qB*V4>}R_?+~kR(Eo2kd19*3$B4Z};$Z z@pbi}w8&RYv+|@SskqcI{}|qJe~KKyS(A|1O{hzTmO|=WU=P1Ncc*iAm)mia&H-c2 z-QaqyKZEJdBnI)seuE6zZZ|k-C3h;yy%$ z;3$O%X^p!Rdclt72Jua9vV{O2SUSbbSBv{f_ith zC(DG2xYE8+nLKW0v6l!8iR?7%G9S`lRjbLe8{BU$HIrzdGh*P$B;x@jc{eF=C0gaE zD!!&jezv!U7H=5Eg;HQ`)W!YqqnXYf=6ABLXK^Hohsm;qc?BH-Sb0jXKPz-kcuNM0 z$6JjkVdz2Y9(orurQ`!&@@zyr=y)=!0mLhl>T8{o@2bZieUfPvg~>iTo1m7yS-OVD zZk)f$?kIwz?W;TZ2vKuo*tpCn_06z|$06R}MLg3^u^_cmk*fo?T$ktpmrjHWU$bzv zu}h?C1p5+q;M9b-M6nnia%}qDn5DO-=Ixq6GLcW*3RGE8urom45=d*ci*6(9)#J!i zMt;jkMI#IM$t)i7$(MK=d7-=n$*>pb67+bg|XU8sZ zdB%1nlhmSn_!$xECh|J+VH(*IU1Vajkp1cs`**En9O}y#mjcWhA#-=7ETiK{bm`V7 z0*}b`tlsx__F4-`I@LVaB+#OV*Oe}`L+u$0be=4~2qPI{SR~i#UKq&k8F%neMAjcy z-dLlH$5?W~aN(kVUfjPgF!ei2Jso7)gS z)>ZAxEILz}5fsrW6*$%P(%d4ple0(%3GEYN8!n!YIr7m#oZ(Yoy-BgqIbr(xr30KT z(^ubsp6OCl`TiXSz~1;t+-RRlq}m*ascJun6>~Vr``bicTl}cPj7X{g)cAD2V02Kx zaFvx>Khnur%g0l;ZWgm_)B`(Wm2pMG9?bc;DheHb&oSDP@GTAfQfKESl)b-#w-u{7 zUL?@%>7Z%WvdT$kCVovQu1-k7J!3uB4-+$WhACgzyI3BwDxnj-l)UsHV`)vzttZdj znfa8#BXdGiple?OE9u=xkKoQU*nVu|1A6*w;2`*|r^-k{sV|O`zA0Xvyi2kLStpoz z!(ExyDd=tI+h(UVth6CYMFp~|X!XH+wiT&Q(0zH|m~eOu z0)SGjhwb2~tY#wLPdwPBoiB5@#HpRn(-KRCWYcE5nTjoNzqJ&!WvS|zOMm?Ev43z5 zIRJkePA`Mkux-LC0w*tR_|n_cRGl0!;8P$HbNE2fKsU|5HYRVWL^Fr~OT7FoxxE}` zs{6Qx2YP%JSWkv%jsOlTX40^vBGM>mR;pT*kve|x^C#yV1z#+d0S(ULL4#$2@*6{> zA49+Fu)rGGcW4G9scyFh$wB2jLQfoDtDw854dNziWv;lC=q~hfnZjBM--atIz3u3z z_ZIYiLEI<(ma@q)aF6R*GH`$m8L4-QR{sc`ONd-crqflsCB2!@Qi!fA{$X5Ea!A+F z78Y=Y{rM8ZL0@v?aMDp}&2I8hr0S%SNrxJS0rR`k@~Xq}n4yC>p-5LIkJBwX2BzrL z3bt%~$Y8y&Y&D2G{84|L93D2E*quXm|7fD5u%w-QfrFqbzxvP7H80_av-NBX)`krO z3Hjw)7MJ#*wed)TgSfJXLR3tc^SdIJ?~u>qmV$4GuE9m^CiXx+PxDg!c<7gM<4lLk zbQP~kjRd9QmXYqnOuj3hgorJvE0Ke(pOlVXUf|czN#%^;+)Ux?-@tjmf1bkZsxCUc zniL+zZtY^5(PpzVT8p|KT~c!btfj zpi8vW(FH$aZ}Lgc#fp31h5PnV%68p~*()i!w*@A3CXszH6)4=KXqtu*SeIKysKmN} z?nusRj#1TgL$5b6c79mWR-v?rr>u%4JwQfWmHD{3eem0y`>rdJgx)FJ(WtdMu99ga z>CPFKaRv*OE$OEA7@*8c%e#E3TODH=bYG+(ouPcXeZS+-w1qV-Om;uTGML1Xx; z^Ptzmr$Y|9+e>o`y}x^mKC5~r&{~w8*5*+}lwi#5dsAFxNN`X4q|G|L+dWZH)a)Oa zx!`DRWXfCQQG;x*vU7Ib9R^ms5`AT+ZYHW8A`z|=XETJYec1`dG8^(+2b!AE1WCun z6+*c|1{1CI?iXlCC;5nu-7-j7b>)|9MEZpTSV^i1$~#5@wUINkdSBg~VPJ_dfv_f5 z7|QZ~!Zc69YSM1MWpxlK3R<5XcA+ti<~{5D&AZ4Q3bYrYy*To?XL-+Aw)(h-);A62 zJoXwjJ#c}qXA60@hD{*`kswvA;?>6>M^=95fPuB9oiRnQwXA?iA^F3q4|+@Ev>+dC z060Mqq~*vs;yfTY*%K`kstB1sw|s82r3gGAFO+KMfN(ob=(dHzN(;+R`}m!Ja!Os@ zu0hqFx3w?6YOb1+%Rgt=@FR%@;Jx(_*^_*4XbH4zl1s+jl1?RdgSQc zq_zbtw0#CD{1`b*r3MEt9b!3y!~yqohLpLwO}he1EN^>X$>XnlKH=Um0ca}(M7qc<2@M*aQ@xy}C&e&w-W1W<#4`XBfPDHaVi)yoJKQI{t zk=@TquiCQ3|&Vsw8sqyCW`&)H5qP5+qGUc{;08ToXj7z-8frRUf5 z%UxY#`V@H&5Zh?v=wq+fnAxOCW_na(Py9y)&Zqje!NW4JLWPmBBGxx^@a3loS%a7k zz*n)dnFM1UlsPd|G%PslYD4d~0abgg<(xyPQYSJqTY3z0N9H`LyJ(1OmaZ8ekx_?- z`#r()X)n^hf_ZSQ5ff*oyVvcA=hnDb1`#f@$m5Z_cHbRW#ETs)v^cElgGl7VU8iaA$ZO=$h|ugF9O!Ya z4dLNA29!&!R@&Wzmw--NrSQm11eWoK<_%UoclWvqLCx*xg%!#rpRF!nI_9Xs)w?GT z?YE1Rb<6p%4Z(Y7In3r) zDpPgM?9ai*BXUf}r}l9+@P4b{RB)?5nnLWUjdRVJp27|gZ)SS3B6(5@23U{FOFziS zM^45WwGUqcIW)|5&09fzF6-$j@Y+jTi3d*JMiMQYX*%eQ!m<4O3Mc1As*9pq)>!Ii z^^*f?(k0;}$W(~@0@5$bg};?i1`h&vcFr>lG&~T(mbe-&@B)RaAGuLbTQR(NYCrSE zf=OI(^Zfn^2(jm6{1cTkCP_+B=6;qf5|j3&8*kWvi=}$sw{sKU!#3)bJ?F~dzI+Ek z{DG?tYE%K)w+^L2G;6{W1L*t#U^(NE;-sq3txEN zRk2QO9128VRT^7$Nq62B;Dc2vzQf z%B(v-N5`6bG$}nk_-b@QXPX&qv;b!W1ksCGk&QSar@{^D@p`FSC+D;*ZK&8&Oq9~qd~J7_)}LEyZr zT&`x_aJ6bYw-0uHn!uIQhq+-&M0pTci-Ymnn*(}(Vl=T3T#<@mpDJzvaE zWz7(u_bW}N2h~+3k)woVX3Do)ch0tAy{Y!AO){cN!H1kbb-wjpod7VrOI1RcI$J4S z%6i8cC053gS14^eNzRyhQ!e~rX`9n7RED>CE{AC7+XAjp64DXydBTKSmEJG_c<40% zyJdnJ(Ap`1AH0@l$CiM#B?@byC=tY>OP=egy@X?Eyk8_MChW7z{Sp8&4@%b4nLu3> zfHinxK9WcI7JedYVSDl*uDoAsQ4Bj_!sF{FeK_|BpFXvi;-2Tez)nDfM`Q>3yaaEO z@vtRXbG!H~-snVwf%S2m3B}U79DKaFYce{LN_kU9p_tgAt1Gznb z5A&1r`kbi_QeeP$z>QaaVi`rOE~SuI(HV4H?bS>l{KBK}Nb=X6&=v{$)oU>MRE zmHD=Q^bQwK%Mor(SbP%!#p12wiMy72O!4}rPwgPhi>Qoj*|#r(I{d|0nb&hAJm7(C z+7{n1!9_c5mDu(JahVgiJ0{^C%y(J=nQBM5R4;4p;yC5Kdo(jv!~8{JaK?!?AW`Qt z{)gFlgE#d(R35ej>v_ReMph)T7s}NKw*twBKMpWm96M$`L>Ol0$~}!6hAW(}NuH`O zD9;lJ8|)Nvu8r;If7!ZwCZ-e-Ds5Q2{Ut`L;s}+g4m6}#sDp$Hlv|(?s)$_i7 zr+C<w#rwbZRrvk1%gG#?4H9+U<~vG^)umY6{YeL23- zxT_@;;&9{YYG}m}z~7=je)8c%1<$%Wd%@MR(Q;C}*K&`d5?#b`M6%k|bIpbArwMpr z?0z-zEyL~_?=F(>wjUKaa)T6?F70J%yy1BZGy+`>86syw6mXAF`5oi3> zE9dBQW5=oB-8>t1+38pY?dinFDa(wzJR@sYn-m+zo#2#t$aeZnomf29N@>4shLepw z?f!*B(Pv@(;}H?}elYN4n{`9eC>bi;MtEo&|8?_m?Bu$dC;MK79;`nh*-iCcQx)3? zqaK}Cid*3a>^GEs``%~MfJ3K%M7Nec(Ou8oj+ry@V{20+=>vt-u`c4)uu*} z)Wag@Gn;!*_kQK&^N*JT=9C#Dwy#_k7?I;o@@r{1XO9mKM@#knz4sE+!cXK&3WDQI1xoyfj^6u+H*a^>ZPpsV>_^UuQA{FjG)YD01lqE(2{ zcT6q)uypvo%6A4eO1-7CEpKA^=RMyoV1oGu}+(JcB_X%0Us z5dM}w3@hvz_eZ!H*iXAgELEwNK_#GS^xI2sNXh#;%-OO}C$Lp?r zdu9cymz=$Ewgt973Bk`l+7N4F3f1I<1bD0s?tszr`?ImU)B>E;{n^xChAruwWD7UO zB%JiFzEt;$9Ay?v={Wt`|Y(@8_7?7!L~07H9InrI_lq_Rd${1 znOkb~)!v0%tnbvi!TLR(&DkBmwp8>U9$qBe@d}Ua!aQ>u7R$*xmcI~4)Wa3JA|FN1Yn!HekK(Fg1p~rHX4kgM zuM$=OZkMgwVypUDfSXD{B29XLW^ZH$wS)j}AplSSFAxDpB&gi0F~&DzM?82bB;6~yvp2kVCljGJQ?)>a6B-*+Gmzi1GzDSk2{x*!1e zkVq*=Lzplq04SsY1k#^Y08p+4z!73#Eh;Gj5JQ**LlE)D+aZ#lZ;22PfCvbr6xYu( z5dcWnBBoYU3INWHia(71p~RmZIEZnrq!m?||93Pk>{on#`8WI>5)Qg{u*K&L5kF+Ffdu2jDt@o9 z%W=cs99+8pH#HE4Uuq@@uGK8vzgFX`$YTdIloKbxh#tl?qf?d>|C^e6wLe0Q===*` z@;yTJYS;K&B=nm=lVB7jq1{~l4ZoxN3-9s^AN3u-qkE0N- zUg!2YbtGsu5G!MdJxMI48=aB01b`{qGUJUR%K|EI>4L zmQV+XF~kA#=g#kPE10E?i@t-4&36o)wc#_UkptZ|?g!N`fu*@K)R^7e$^mK$wWIUU zy*}~7f`6i>yRquKmVagc^T_W8Tr=?V{R97xeGd@L-@K&z*?0e$>@SCYr~6lbVOHkf zo0sn27;l9C(ZOE>;Ja!6CDxnN{qL-Q9Q!Y^UXQ2$LwJ9l`km9n!W#0=3H?LzZvnP| z+?3(J7toK%`Qy}ofzTdq|`)YoF^!~=+C-c)j45$HORu)#XN{?>jWnf@oIgrjGyy~csRIzu9 z+oM^@3{GAdyU+fUc|Iqc(R0z_K!h;~IE+~cG=_tDL!+vr!ngk`IcCK83Lju8Ecu(e zEK*GLz_BKT2{l+vFok%^^`JjzmxHvBY#@_W>4nxKcEytVnmc5_%H*Atv#BgRS@!hY zeb#K*DkrdcOp-AkY{khLskw>3AQEK}ZD15-l3>luOG&MOb2*CgOG{GYGgIIqT*awH zWtqt+V(cYFnFSz4VnUKKl0Ir2GNLS!q70Hy(?nUsgv2f|ip^k@WDw)9k>H2}#{~m} z1P3FOCC0!YDFX~^22m!Uh}r^1mIwqJYNREb? z(*iPQ9TU(Tpe`_w;NXU`(9GGy3^NDjQJ5hMK!$8$1{zYL0W;*O8ia*vh}Z!(F(IHI zCD;lQ^HLIvCGN3SD<$XWl;-BegM&P^SjkEux>_kMGc_kAJ})sh70Ayk&B@VGP|8g$ zNlZyBNkoXC%IB5l7F5QUR2G2rDCH+*r6!js0rf$wf=lFPR-~rrX6BWorl%Gu)y4u< zKwN;$Y9+9%K&Ifd9iar(0YDqfOt9Dw5wF!zK=Y*%ERo^24Otb179!Fxek)PcQD`YN zli;@zNfAlbfg`ghHL)bW2$%+{m2xtZiV}+|LFpkavAColv8b>#6`Ux_Qj3Z+^YcJ5 zMg~U4dWHsihDyLlfy8D!Qi_1aFUV+UNZk iEXl~vQ-B(v0Jm8|BdIhqCq= Date: Mon, 11 Dec 2023 15:20:47 +0000 Subject: [PATCH 12/30] cli --- gtfs_skims/cli.py | 7 +++++-- gtfs_skims/connectors.py | 27 ++++++++++++++++--------- tests/__init__.py | 0 tests/test_cli.py | 25 +++++++++++++++++++++++ tests/test_connectors.py | 21 +++++++++++++++++++ tests/test_data/centroids.csv | 38 +++++++++++++++++------------------ 6 files changed, 87 insertions(+), 31 deletions(-) create mode 100644 tests/__init__.py diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py index 328bd9e..110072f 100755 --- a/gtfs_skims/cli.py +++ b/gtfs_skims/cli.py @@ -1,7 +1,7 @@ """Console script for gtfs_skims.""" import click - +from typing import Optional from gtfs_skims.preprocessing import main as main_preprocessing from gtfs_skims.connectors import main as main_connectors @@ -20,8 +20,11 @@ def cli(args=None): @cli.command() @click.argument('config_path') -def run(config_path: str): +@click.option("--output_directory_override", default=None, help="override output directory") +def run(config_path: str, output_directory_override: Optional[str] = None): config = Config.from_yaml(config_path) + if output_directory_override is not None: + config.path_outputs = output_directory_override steps = config.steps if 'preprocessing' in steps: diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index 8d15bfd..b573129 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -315,20 +315,24 @@ def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: n return arr -def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]: +def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]: logger = get_logger(os.path.join( config.path_outputs, 'log_connectors.log')) - coords_origins = pd.read_csv(config.path_origins) - coords_destinations = pd.read_csv(config.path_destinations) + + if data is None: + data = GTFSData.from_parquet(config.path_outputs) + coords_origins = pd.read_csv(config.path_origins, index_col=0) + coords_destinations = pd.read_csv(config.path_destinations, index_col=0) # get feasible connections logger.info('Getting transfer connectors...') transfer_connectors = get_transfer_connectors(data, config) logger.info('Getting access connectors...') - access_connectors = get_access_connectors(data, config, coords_origins) + access_connectors = get_access_connectors( + data, config, coords_origins.assign(z=config.start_s).values) logger.info('Getting egress connectors...') egress_connectors = get_egress_connectors( - data, config, coords_destinations) + data, config, coords_destinations.values) # convert to dataframe colnames = ['onode', 'dnode', 'walk', 'wait'] @@ -337,19 +341,22 @@ def main(data: GTFSData, config: Config) -> tuple[TransferConnectors, AccessEgre egress_connectors = pd.DataFrame(egress_connectors, columns=colnames) # offset IDs for endpoints - access_connectors['onode'] += (len(data.stop_times)+1) - egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)+2) + access_connectors['onode'] += len(data.stop_times) + egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)) # save logger.info(f'Saving connectors to f{config.path_outputs}...') transfer_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_transfer.parquet') + os.path.join(config.path_outputs, 'connectors_transfer.parquet.gzip'), + compression='gzip' ) access_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_access.parquet') + os.path.join(config.path_outputs, 'connectors_access.parquet.gzip'), + compression='gzip' ) egress_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_egress.parquet') + os.path.join(config.path_outputs, 'connectors_egress.parquet.gzip'), + compression='gzip' ) return transfer_connectors, access_connectors, egress_connectors diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cli.py b/tests/test_cli.py index f5a59d0..a8540fd 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,9 +1,13 @@ """Tests for `gtfs_skims` CLI.""" +import os +from pathlib import Path from click.testing import CliRunner from gtfs_skims import cli +TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data') + def test_command_line_interface(): """Test the CLI.""" @@ -18,3 +22,24 @@ def test_command_line_interface(): "--help Show this message and exit.\n" in help_result.output ) + + +def test_run_steps_saves_outputs(tmpdir): + runner = CliRunner() + result = runner.invoke( + cli.cli, + ['run', os.path.join(TEST_DATA_DIR, 'config_demo.yaml'), + '--output_directory_override', tmpdir] + ) + + assert result.exit_code == 0 + + for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + assert os.path.exists( + os.path.join(tmpdir, f'{x}.parquet.gzip') + ) + + for x in ['transfer', 'access', 'egress']: + assert os.path.exists( + os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') + ) \ No newline at end of file diff --git a/tests/test_connectors.py b/tests/test_connectors.py index 2922397..7f0c0da 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -163,3 +163,24 @@ def test_convert_distance_3d(): max_tranfer_distance=1 ) assert len(egress.ods) == 1 # radius has been adjusted to 3D space + + +def test_apply_crow_fly_factoring(): + pass + + +def test_access_indices_are_offset(): + pass + + +def test_egress_indices_are_offset(): + pass + + +def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir): + config.path_outputs = tmpdir + connectors.main(config=config, data=gtfs_data_preprocessed) + for x in ['transfer', 'access', 'egress']: + assert os.path.exists( + os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') + ) \ No newline at end of file diff --git a/tests/test_data/centroids.csv b/tests/test_data/centroids.csv index 5fbb1e0..3e3b3ac 100644 --- a/tests/test_data/centroids.csv +++ b/tests/test_data/centroids.csv @@ -1,19 +1,19 @@ -name,longitude,latitude -E02003587,-1.155884355526687,50.72185610082279 -E02003586,-1.173878456845198,50.72301086140903 -E02003585,-1.224069337360162,50.72613466907799 -E02003584,-1.158508858308219,50.72873880022016 -E02003583,-1.28149160450734,50.75184512743099 -E02003582,-1.31006711916123,50.75340385923516 -E02003581,-1.297514083246226,50.75714214474556 -E02003589,-1.302344772901295,50.70267302231341 -E02003588,-1.284284792950486,50.703256856445805 -E02003597,-1.283759136860344,50.61568691131167 -E02003596,-1.175831901564544,50.63446808149097 -E02003595,-1.180359149209377,50.64417605386147 -E02003594,-1.154986228595743,50.659399194734654 -E02003593,-1.386537290710913,50.68165562633486 -E02003592,-1.526357692381475,50.68396619141156 -E02003591,-1.30109588996732,50.69411141243501 -E02003590,-1.096598389236477,50.69424449742397 -E02003598,-1.210809598549173,50.59781051582961 +name,x,y +E02003587,459682,91699 +E02003586,458411,91813 +E02003585,454864,92122 +E02003584,459488,92462 +E02003583,450783,94940 +E02003582,448766,95094 +E02003581,449648,95518 +E02003589,449364,89458 +E02003588,450639,89535 +E02003597,450770,79798 +E02003596,458382,81965 +E02003595,458050,83041 +E02003594,459825,84754 +E02003593,443438,87068 +E02003592,433558,87253 +E02003591,449461,88507 +E02003590,463905,88678 +E02003598,455952,77862 From 26a3f678ad05539d12c801425feead6213957590 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 11 Dec 2023 17:15:11 +0000 Subject: [PATCH 13/30] crows fly distance --- gtfs_skims/connectors.py | 58 +++++++++++++++++++++++----------------- tests/test_connectors.py | 38 ++++++++++++++++++-------- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index b573129..cc8a94a 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -35,9 +35,9 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array: class TransferConnectors: """ Manages transfer connectors. """ - def __init__(self, coords: np.ndarray, max_tranfer_distance: float) -> None: + def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None: self.coords = coords - radius = max_tranfer_distance * (2**0.5) + radius = max_transfer_distance * (2**0.5) self.ods = query_pairs(coords, radius=radius) @cached_property @@ -198,14 +198,14 @@ def __init__( self, coords_origins: np.ndarray, coords_destinations: np.ndarray, - max_tranfer_distance: float + max_transfer_distance: float ) -> None: self.coords_origins = coords_origins self.coords_destinations = coords_destinations - radius = max_tranfer_distance + radius = max_transfer_distance if coords_origins.shape[1] == 3: - radius += max_tranfer_distance * (2**0.5) + radius += max_transfer_distance * (2**0.5) self.ods = query_pairs_od(coords_origins, coords_destinations, radius=radius) @@ -231,23 +231,25 @@ def dcoords(self) -> np.array: def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: time_to_distance = config.walk_speed/3.6 # km/hr to meters - max_tranfer_distance = config.max_transfer_time * time_to_distance + max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance # get candidate connectors coords = data.stop_times[['x', 'y', 'departure_s']].values - tc = TransferConnectors(coords, max_tranfer_distance) + coords[:, :2] = coords[:, :2] * \ + config.crows_fly_factor # crow's fly transformation + tc = TransferConnectors(coords, max_transfer_distance) # apply more narrow filters: # enough time to make transfer - tc.filter_feasible_transfer(max_tranfer_distance) + tc.filter_feasible_transfer(max_transfer_distance) # maximum walk - if config.walk_distance_threshold < max_tranfer_distance: + if config.walk_distance_threshold < max_transfer_distance: tc.filter_max_walk(config.walk_distance_threshold) # maximum wait - if max_wait_distance < max_tranfer_distance: + if max_wait_distance < max_transfer_distance: tc.filter_max_wait(max_wait_distance) # not same route @@ -272,21 +274,26 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: return arr -def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.ndarray): +def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame): time_to_distance = config.walk_speed/3.6 # km/hr to meters - max_tranfer_distance = config.max_transfer_time * time_to_distance + max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance # get candidate connectors coords_stops = data.stop_times[['x', 'y', 'departure_s']].values + coords_stops[:, :2] = coords_stops[:, :2] * \ + config.crows_fly_factor # crow's fly transformation + coords_origins = (origins[['x', 'y']] * config.crows_fly_factor).\ + assign(z=config.start_s).values + ac = AccessEgressConnectors( - coords_origins, coords_stops, max_tranfer_distance) + coords_origins, coords_stops, max_transfer_distance) # more narrow filtering - ac.filter_feasible_transfer(max_tranfer_distance) - if config.walk_distance_threshold < max_tranfer_distance: + ac.filter_feasible_transfer(max_transfer_distance) + if config.walk_distance_threshold < max_transfer_distance: ac.filter_max_walk(config.walk_distance_threshold) - if max_wait_distance < max_tranfer_distance: + if max_wait_distance < max_transfer_distance: ac.filter_max_wait(max_wait_distance) arr = np.concatenate([ @@ -298,11 +305,16 @@ def get_access_connectors(data: GTFSData, config: Config, coords_origins: np.nda return arr -def get_egress_connectors(data: GTFSData, config: Config, coords_destinations: np.ndarray): +def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame): time_to_distance = config.walk_speed/3.6 # km/hr to meters # get candidate connectors coords_stops = data.stop_times[['x', 'y']].values + coords_stops[:, :2] = coords_stops[:, :2] * \ + config.crows_fly_factor # crow's fly transformation + coords_destinations = ( + destinations[['x', 'y']] * config.crows_fly_factor).values + ec = AccessEgressConnectors( coords_stops, coords_destinations, config.walk_distance_threshold) @@ -321,18 +333,16 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne if data is None: data = GTFSData.from_parquet(config.path_outputs) - coords_origins = pd.read_csv(config.path_origins, index_col=0) - coords_destinations = pd.read_csv(config.path_destinations, index_col=0) + origins = pd.read_csv(config.path_origins, index_col=0) + destinations = pd.read_csv(config.path_destinations, index_col=0) # get feasible connections logger.info('Getting transfer connectors...') transfer_connectors = get_transfer_connectors(data, config) logger.info('Getting access connectors...') - access_connectors = get_access_connectors( - data, config, coords_origins.assign(z=config.start_s).values) + access_connectors = get_access_connectors(data, config, origins) logger.info('Getting egress connectors...') - egress_connectors = get_egress_connectors( - data, config, coords_destinations.values) + egress_connectors = get_egress_connectors(data, config, destinations) # convert to dataframe colnames = ['onode', 'dnode', 'walk', 'wait'] @@ -342,7 +352,7 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne # offset IDs for endpoints access_connectors['onode'] += len(data.stop_times) - egress_connectors['dnode'] += (len(data.stop_times)+len(coords_origins)) + egress_connectors['dnode'] += (len(data.stop_times)+len(origins)) # save logger.info(f'Saving connectors to f{config.path_outputs}...') diff --git a/tests/test_connectors.py b/tests/test_connectors.py index 7f0c0da..84abbd3 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -147,7 +147,7 @@ def test_get_od_walk(): egress = connectors.AccessEgressConnectors( np.array([[0, 0], [1, 1]]), np.array([[0.5, 0.5], [2, 1], [2, 2]]), - max_tranfer_distance=1 + max_transfer_distance=1 ) walk = egress.walk expected = np.array([ @@ -157,24 +157,40 @@ def test_get_od_walk(): def test_convert_distance_3d(): - egress = connectors.AccessEgressConnectors( + access = connectors.AccessEgressConnectors( np.array([[0, 0, 0]]), np.array([[1, 1, 1]]), - max_tranfer_distance=1 + max_transfer_distance=1 ) - assert len(egress.ods) == 1 # radius has been adjusted to 3D space + assert len(access.ods) == 1 # radius has been adjusted to 3D space -def test_apply_crow_fly_factoring(): - pass +def test_apply_crow_fly_factoring(gtfs_data_preprocessed, config): + arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config) + assert len(arr) == 2 + max_walk = arr[:, 3].max() + config.walk_distance_threshold = max_walk + config.crows_fly_factor = 1 + arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config) + assert len(arr) == 2 -def test_access_indices_are_offset(): - pass + # after adding the crow's fly factor, the destination is further than the max distance + config.crows_fly_factor = 1.05 + arr = connectors.get_transfer_connectors(gtfs_data_preprocessed, config) + assert len(arr) < 2 -def test_egress_indices_are_offset(): - pass +def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir): + config.path_outputs = tmpdir + transfer_connectors, access_connectors, egress_connectors = \ + connectors.main(config=config, data=gtfs_data_preprocessed) + stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times))) + assert all(np.isin(access_connectors['dnode'], stop_time_ids)) + assert all(np.isin(egress_connectors['onode'], stop_time_ids)) + assert np.isin(access_connectors['onode'], stop_time_ids).sum() == 0 + assert np.isin(egress_connectors['dnode'], stop_time_ids).sum() == 0 + assert access_connectors['onode'].max() < egress_connectors['dnode'].min() def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir): @@ -183,4 +199,4 @@ def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir): for x in ['transfer', 'access', 'egress']: assert os.path.exists( os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') - ) \ No newline at end of file + ) From c5061620a93ce7d76a83de690183859b1caf1e85 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 11 Dec 2023 20:37:45 +0000 Subject: [PATCH 14/30] connectors dataclass --- .gitignore | 1 + gtfs_skims/connectors.py | 40 ++++++-------- gtfs_skims/graph.py | 6 +++ gtfs_skims/utils.py | 92 +++++++++++++++++++++++++++----- tests/test_connectors.py | 13 +++-- tests/test_data/config_demo.yaml | 7 ++- tests/test_graph.py | 5 ++ 7 files changed, 120 insertions(+), 44 deletions(-) create mode 100644 tests/test_graph.py diff --git a/.gitignore b/.gitignore index 5f9b874..56c75cc 100755 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,5 @@ mike-*.yml .ipynb_checkpoints sandbox.py +tests/test_data/outputs/ tests/test_data/outputs/*log \ No newline at end of file diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index cc8a94a..f2c6d9e 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -8,7 +8,7 @@ from scipy.spatial import KDTree import pandas as pd -from gtfs_skims.utils import Config, GTFSData, get_logger +from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger from gtfs_skims.variables import DATA_TYPE @@ -327,7 +327,7 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF return arr -def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConnectors, AccessEgressConnectors, AccessEgressConnectors]: +def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData: logger = get_logger(os.path.join( config.path_outputs, 'log_connectors.log')) @@ -338,35 +338,29 @@ def main(config: Config, data: Optional[GTFSData] = None) -> tuple[TransferConne # get feasible connections logger.info('Getting transfer connectors...') - transfer_connectors = get_transfer_connectors(data, config) + connectors_transfer = get_transfer_connectors(data, config) logger.info('Getting access connectors...') - access_connectors = get_access_connectors(data, config, origins) + connectors_access = get_access_connectors(data, config, origins) logger.info('Getting egress connectors...') - egress_connectors = get_egress_connectors(data, config, destinations) + connectors_egress = get_egress_connectors(data, config, destinations) # convert to dataframe colnames = ['onode', 'dnode', 'walk', 'wait'] - transfer_connectors = pd.DataFrame(transfer_connectors, columns=colnames) - access_connectors = pd.DataFrame(access_connectors, columns=colnames) - egress_connectors = pd.DataFrame(egress_connectors, columns=colnames) + connectors_transfer = pd.DataFrame(connectors_transfer, columns=colnames) + connectors_access = pd.DataFrame(connectors_access, columns=colnames) + connectors_egress = pd.DataFrame(connectors_egress, columns=colnames) # offset IDs for endpoints - access_connectors['onode'] += len(data.stop_times) - egress_connectors['dnode'] += (len(data.stop_times)+len(origins)) + connectors_access['onode'] += len(data.stop_times) + connectors_egress['dnode'] += (len(data.stop_times)+len(origins)) # save - logger.info(f'Saving connectors to f{config.path_outputs}...') - transfer_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_transfer.parquet.gzip'), - compression='gzip' - ) - access_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_access.parquet.gzip'), - compression='gzip' - ) - egress_connectors.to_parquet( - os.path.join(config.path_outputs, 'connectors_egress.parquet.gzip'), - compression='gzip' + logger.info(f'Saving connectors to {config.path_outputs}...') + connectors = ConnectorsData( + connectors_transfer=connectors_transfer, + connectors_access=connectors_access, + connectors_egress=connectors_egress, ) + connectors.save(config.path_outputs) - return transfer_connectors, access_connectors, egress_connectors + return connectors diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index e69de29..bec47c2 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -0,0 +1,6 @@ +import os + +from graph_tool import Graph +from graph_tool.topology import shortest_distance + +from gtfs_skims.utils import Config, GTFSData, get_logger \ No newline at end of file diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index 3482a17..0f796c1 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -4,7 +4,7 @@ import logging import os from pathlib import Path -from typing import Optional +from typing import Optional, Self import yaml from zipfile import ZipFile @@ -118,6 +118,9 @@ class Config: start_s: int walk_distance_threshold: int walk_speed: float + weight_walk: float + weight_wait: float + penalty_interchange: float steps: list @classmethod @@ -146,16 +149,65 @@ def __repr__(self) -> str: return s -@dataclass -class GTFSData: - calendar: pd.DataFrame - routes: pd.DataFrame - stops: pd.DataFrame - stop_times: pd.DataFrame - trips: pd.DataFrame +# @dataclass +# class GTFSData: +# calendar: pd.DataFrame +# routes: pd.DataFrame +# stops: pd.DataFrame +# stop_times: pd.DataFrame +# trips: pd.DataFrame + +# @classmethod +# def from_gtfs(cls, path_gtfs: str) -> GTFSData: +# """Load GTFS tables from a standard zipped GTFS file. + +# Args: +# path_gtfs (str): Path to a zipped GTFS dataset. + +# Returns: +# GTFSData: GTFS data object. +# """ +# data = {} +# with ZipFile(path_gtfs, 'r') as zf: +# for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: +# with zf.open(f'{name}.txt') as f: +# data[name] = pd.read_csv(f, low_memory=False) +# return cls(**data) + +# @classmethod +# def from_parquet(cls, path: str) -> GTFSData: +# """Construct class from pre-processed GTFS tables in Parquet format. + +# Args: +# path (str): Path to tables. + +# Returns: +# GTFSData: GTFS data object. +# """ +# data = {} +# for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: +# data[name] = pd.read_parquet( +# os.path.join(path, f'{name}.parquet.gzip')) +# return cls(**data) + +# def save(self, path_outputs: str) -> None: +# """Export all tables in zipped parquet format. + +# Args: +# path_outputs (str): Directory to save outputs. +# """ +# if not os.path.exists(path_outputs): +# os.makedirs(path_outputs) + +# for k, v in self.__dict__.items(): +# v.to_parquet(os.path.join( +# path_outputs, f'{k}.parquet.gzip'), compression='gzip') + +@dataclass +class Data: @classmethod - def from_gtfs(cls, path_gtfs: str) -> GTFSData: + def from_gtfs(cls, path_gtfs: str) -> Self: """Load GTFS tables from a standard zipped GTFS file. Args: @@ -166,13 +218,13 @@ def from_gtfs(cls, path_gtfs: str) -> GTFSData: """ data = {} with ZipFile(path_gtfs, 'r') as zf: - for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + for name in cls.__annotations__.keys(): with zf.open(f'{name}.txt') as f: data[name] = pd.read_csv(f, low_memory=False) return cls(**data) @classmethod - def from_parquet(cls, path: str) -> GTFSData: + def from_parquet(cls, path: str) -> Self: """Construct class from pre-processed GTFS tables in Parquet format. Args: @@ -182,7 +234,7 @@ def from_parquet(cls, path: str) -> GTFSData: GTFSData: GTFS data object. """ data = {} - for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + for name in cls.__annotations__.keys(): data[name] = pd.read_parquet( os.path.join(path, f'{name}.parquet.gzip')) return cls(**data) @@ -199,3 +251,19 @@ def save(self, path_outputs: str) -> None: for k, v in self.__dict__.items(): v.to_parquet(os.path.join( path_outputs, f'{k}.parquet.gzip'), compression='gzip') + + +@dataclass +class GTFSData(Data): + calendar: pd.DataFrame + routes: pd.DataFrame + stops: pd.DataFrame + stop_times: pd.DataFrame + trips: pd.DataFrame + + +@dataclass +class ConnectorsData(Data): + connectors_transfer: pd.DataFrame + connectors_access: pd.DataFrame + connectors_egress: pd.DataFrame diff --git a/tests/test_connectors.py b/tests/test_connectors.py index 84abbd3..f47fe9a 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -183,14 +183,13 @@ def test_apply_crow_fly_factoring(gtfs_data_preprocessed, config): def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir): config.path_outputs = tmpdir - transfer_connectors, access_connectors, egress_connectors = \ - connectors.main(config=config, data=gtfs_data_preprocessed) + conn = connectors.main(config=config, data=gtfs_data_preprocessed) stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times))) - assert all(np.isin(access_connectors['dnode'], stop_time_ids)) - assert all(np.isin(egress_connectors['onode'], stop_time_ids)) - assert np.isin(access_connectors['onode'], stop_time_ids).sum() == 0 - assert np.isin(egress_connectors['dnode'], stop_time_ids).sum() == 0 - assert access_connectors['onode'].max() < egress_connectors['dnode'].min() + assert all(np.isin(conn.connectors_access['dnode'], stop_time_ids)) + assert all(np.isin(conn.connectors_egress['onode'], stop_time_ids)) + assert np.isin(conn.connectors_access['onode'], stop_time_ids).sum() == 0 + assert np.isin(conn.connectors_egress['dnode'], stop_time_ids).sum() == 0 + assert conn.connectors_access['onode'].max() < conn.connectors_egress['dnode'].min() def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir): diff --git a/tests/test_data/config_demo.yaml b/tests/test_data/config_demo.yaml index 4196b30..fb049f7 100644 --- a/tests/test_data/config_demo.yaml +++ b/tests/test_data/config_demo.yaml @@ -11,11 +11,14 @@ settings: walk_distance_threshold : 2000 # m | Max walk distance in a leg walk_speed : 4.5 # kph | Walking speed crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances - max_transfer_time : 1800 # Max combined time of walking and waiting (sec) + max_transfer_time : 1800 # Max combined time of walking and waiting (sec) of a transfer max_wait : 1800 # sec | Max wait time at a stop / leg bounding_box : null epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. - + weight_walk: 2 # value of walk time, ratio to in-vehicle time + weight_wait: 2 # value of wait time, ratio to in-vehicle time + penalty_interchange: 300 # seconds added to generalised cost for each interchange + steps: - preprocessing - connectors diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..e1fe1b3 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,5 @@ +import os + +import pytest + +from gtfs_skims import graph \ No newline at end of file From cd8e47e6f5092efb5b91e2be2f7978227174ab1b Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Mon, 11 Dec 2023 21:40:24 +0000 Subject: [PATCH 15/30] edges --- gtfs_skims/connectors.py | 46 ++++++++++++++++- gtfs_skims/graph.py | 47 +++++++++++++++++- tests/conftest.py | 7 ++- .../test_data/outputs/stop_times.parquet.gzip | Bin 8296 -> 8287 bytes tests/test_data/outputs/stops.parquet.gzip | Bin 8421 -> 8417 bytes tests/test_graph.py | 30 ++++++++++- 6 files changed, 125 insertions(+), 5 deletions(-) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index f2c6d9e..a3536fd 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -230,6 +230,15 @@ def dcoords(self) -> np.array: def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: + """Get all transfer connectors (between stops). + + Args: + data (GTFSData): GTFS data object. + config (Config): Config object. + + Returns: + np.ndarray: [origin id, destination id, walk time, wait time] + """ time_to_distance = config.walk_speed/3.6 # km/hr to meters max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance @@ -274,7 +283,18 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: return arr -def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame): +def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame) -> np.ndarray: + """Get all access connectors (between origins and stops). + + Args: + data (GTFSData): GTFS data object. + config (Config): Config object. + destinations (pd.DataFrame): Origin coordinates dataframe. + Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip start points. + + Returns: + np.ndarray: [origin id, destination id, walk time, wait time] + """ time_to_distance = config.walk_speed/3.6 # km/hr to meters max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance @@ -305,7 +325,18 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame) return arr -def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame): +def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataFrame) -> np.ndarray: + """Get all egress connectors (between stops and destinations). + + Args: + data (GTFSData): GTFS data object. + config (Config): Config object. + destinations (pd.DataFrame): Destination coordinates dataframe. + Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip ends. + + Returns: + np.ndarray: [origin id, destination id, walk time, wait time] + """ time_to_distance = config.walk_speed/3.6 # km/hr to meters # get candidate connectors @@ -328,6 +359,17 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData: + """Get feasible connections (transfers, access, egress). + + Args: + config (Config): Config object. + data (Optional[GTFSData], optional): GTFS data object. + If not provided, reads the stored parquet files from the outputs directory. + Defaults to None. + + Returns: + ConnectorsData: Connectors object, holding the three output tables. + """ logger = get_logger(os.path.join( config.path_outputs, 'log_connectors.log')) diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index bec47c2..2d13ed0 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -2,5 +2,50 @@ from graph_tool import Graph from graph_tool.topology import shortest_distance +import numpy as np +import pandas as pd -from gtfs_skims.utils import Config, GTFSData, get_logger \ No newline at end of file +from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger + + +def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame: + """Get in-vehicle times between stops. + + Args: + stop_times (pd.DataFrame): The stoptimes GTFS table. + + Returns: + np.ndarray: [origin id, destination id, in-vehicle time] + """ + edges_ivt = pd.Series(range(len(stop_times))) + trip_id = stop_times.reset_index()['trip_id'] + departures = stop_times.reset_index()['departure_s'] + + edges_ivt = pd.concat([ + edges_ivt, + edges_ivt.groupby(trip_id).shift(-1), + departures.groupby(trip_id).shift(-1) - departures, + ], axis=1).dropna().map(int) + edges_ivt.columns = ['onode', 'dnode', 'ivt'] + + return edges_ivt + + +def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.DataFrame: + """Get all edges for the accessibility graph. + + Args: + gtfs_data (GTFSData): GTFS data object. + connectors_data (ConnectorsData): Connectords data object. + + Returns: + pd.DataFrame: ['onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer'] + """ + edges = pd.concat([ + get_ivt_edges(gtfs_data.stop_times), + connectors_data.connectors_transfer.assign(transfer=1), + connectors_data.connectors_access, + connectors_data.connectors_egress, + ], axis=0).fillna(0).map(int) + + return edges diff --git a/tests/conftest.py b/tests/conftest.py index bc546af..824ee6e 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ def test_content(response): import pytest -from gtfs_skims.utils import Config, GTFSData +from gtfs_skims.utils import Config, GTFSData, ConnectorsData TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data') @@ -40,3 +40,8 @@ def gtfs_data(): @pytest.fixture def gtfs_data_preprocessed(): return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs')) + + +@pytest.fixture +def connectors_data(): + return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs')) diff --git a/tests/test_data/outputs/stop_times.parquet.gzip b/tests/test_data/outputs/stop_times.parquet.gzip index 29ecd67f7f10a602c6edc3eeb7076283075ac3df..2dd4231027e3f3d89f21e253de2dc415a3668ae0 100644 GIT binary patch delta 1166 zcmV;91abT5K;J;H3K9as2$Mkx7k|`CNK{cA$MI`QMYXUX2!aY~DYP3x5Zg!uL5mhm z8!2$I5?V;KiIhkMQ4xIL#y8@CqA=qqzD7rUi{oN^KL$}3J{S~TIpg?1e_t0@+z5gj z@8-jK|Ih!Nb0ZDI@I*!A?%y8wqF+4bPvAJ`?UPBEj79Ww zFfNbzeE68SVlV}hiQA6v@P7^Mop_ghW|9XBKEQa|(-^;SX8s>@=KQ73oAYbGq5JQP zkdM&IyRf5^_hH5VSWJ5&=1{-E`Q~5;=bC}vxu3b*!@s*SKsg)laW88_=KI`?vCOlH zi($Ep0gPka`^58*I1}+7{$U>_=)!jLQcu3F^LCxLi{#B#W!CQE9e=m*Uu%<|4*9|@ z+q!7)rrjg^d*phb>>ZF@gYxCDtQetwl>duYc2gdv-8Cd<8Sfr2&%eTYQ^a89VzJYi4+#JO6#x~UBo(R%lR*g= zf7DGV;pAaFji)>gE|hWdREi4` zkvxW+Oo@wJD1?&BL`@_jPfk6s@c~*M@Apj-ESWA z@EtB2#1wK5y3sp-CrF&dejLC##+|a>eEF7upvaSdJf&gB;( zo`;H4;j9xOE|7i6_2h9(3lm3~@5UnPm1GP1lmsjHf2#gI_dP5Q@eR5v#9p$gQtU%@ z73U>88DEEq_zzRd#gAp;SJY9zAy4CBk+@O7`_C5}$;TMXI6wSQC>nX*!aO(ne^bB0 z9_DF^#Z4S3VV_d5m2++2E)H`)Eqxlg*T3%$Wq&<(aV(ejnj_ZWmu&GeQw+=!+c1E7 zwvG3KNod1>4AGp%x$qiK(#2TDx3m5t`#Z6V`dbV^GbUq4mU7-_?qkKD)DsxD&OL18 zLY^-(-a~(O>@R(L>D%M`rn4wre=&)AR7Z(1kzyy`)fv90arlRNG~ZD*dcUv!Q73v5X02YWyJQxV80tWy90000O2(gpE85Sz?P#hHm85IB-7y)?|1Qx&q g7RDqN(g+y<7V=O$7zht}0RR91000;Wgt8iF22=$+g#Z8m delta 1180 zcmV;N1Y`T(K$EJv4f>_wp9vi2yvJlFX5l~j zIT%;Wd;z>mT*;V?S;TF}e;4?Sb}wFMp9SQ>g10b@_FTpXPt5;m&YZuTpgF(xc-{X% zoV#Q(HsU?KHuoNpm^aIX3Ik^5Q1J^UV#VamVp2KTad+kBthn8G~k zIM4lD#4x6^?oHykO`I9{1HZA4Qfx*Kd8sE~mwCI)+gb8vt1@eMfAfx8`LB6ok5@kG zlI`8J_t4%eeSLD(C;JCv*PwhnEGtH+_w#=lmEDwwXm1{pGmP&XFweildXvOqC5{i6 zM023_YCtOj8zd?c9Unb3YSI`%VpgVOHd*$VQfX7rwCZ8BXcHoe z_*PI{IiR>vHwr51LN|koPsH~V6#0E;@fSCO;Ktc}IOpE`zvrGi+!Kajw4DrDu!k7C zzYfgBGuRs|UBqP=z~K7vIN6RBSQX4Oe{Y5FUBZ6sWW1L7Md-pM#-q{0I$w+&K>wb) z&Lzn8MA>{#-nlQkh(n3L6E|Y!U0Kii0KO$(NSw+(-gxu=ht=PsK1XW4Jca&G)Jy!Z zK$fGWP_7aCnEwsaF&Z;I%8Ook1tZBD*g}3jPY%82{&VF|;w4OA-dVixM#eM#e}{De z3?sjVrL42Qlfzi|UM_x+f2nH-r?8guMbM|3J^j0uZ|?7eM>f3VUUTFZy!=9Lx@A}xNKTF`u*BR|bR z)~Q1wSFuFA#`x*~=07Z6EkBtX_kSXl6RzR7`;ugvdsD_!yeMAIdi|fAV?BGF<7po9 z9va;H6W_u)?Xz6r9o))fzn=~knddX#EQIek$726oX+5j0vz#Y++&(U`ojvN(qFxo( zBY(>^_w0&uzZ_0*uGzZ8OXX*{=Z3m4+3q|$-S6&js5jiyA9fFj4~h?Y?|;ML#|Pp4 zk#OKqxOp_ZI;Q`Ldo~^pYLAMyKMtpi?;pvYf7bsdt)t934l-Z7bT~V|jW^{L7{AW` z?%D5F`_JnNm#IsWI*qll)qA`fE^P^)s_OxMQ1`FSWkLUX`A3XTo3G3K|GwYTs^CD^ z1pojQ02Qtm73v2p74iZV0F#Cn5`SH2&2bn9@G~w}s9mgaC98S6n6-q+Hk)}_I&IAm zyE2Z=hAkU5ZYakUrW7|s6D{)+a%ClM5%!{OXR;X;dEcf8&0IX&Td?|(JQkG$pkO3Z19i#gsA-fee&d&A>g&=&sQ6PCAz7g(m= zmU0f<&J@dbhshT2!AXAG6>ipl(fwa|ewtU*XR?M#?%<`?+m&Nd&1cNKN*lu2Tg-{~`GH9;V+pS|IsZoI%U5+_1q(KY^&7&={%EVm{--9cP~YS~ zDyYr*M-+?7!)Lw?b0000O2!ylh8~X&#zdcg` delta 1127 zcmV-t1ep8bLFGZPWfcO72$K*C7L$Dy3V%XUEi4Fvpn_To?IZ-TjYJT%XpwECAhQx$ zNVJKRD1%T5vS?+}gD&(Sd{vsS3^5H|SX!16<)Q~tRI97EGU|LSuK(s^=A4;-=FCh^ zB9Zu!omKy9S>o?knhuXGgu9u;)O=W@{hc+&_po*`EKKD7b4I&?<*ec$H_5-HJ%9T< z9OZqEbKd{%GQ|S!Vl@|kh99&$%u~P%`pa3#G@Im|*El1s#BwO8^Ql91|@iNxw|Lh#=_{ce) z;bHHg-n~DW3g@)Xaiw=~b0PEnbgPm000#L6|NT*><26r@{u+;hM8 zedlr~ClZO#NjanCIf?P#19oy6hswex`64rXTM^#pWnP`A>-+9GbNP%9t$+9HKgA}F zSubISed*HhIR~xJs0x2ohj&(nD^`V%nTXZ-02kdkynF`+Z-5IbG)>j&y{#wmHA{@DS&34gaLV;{Fl0YSyud=i0LO75JVcm#8=Dd*M4$ za+mcX{Uhe{#=HvaomYc7ae%K`$EBRdzNGuO$@%hWZCK8{HDUeg@PE8}bel71sp-02Klm zoJa<9V`yb7zhw~0RR91000;Wg|p@x`ve87M|S`K diff --git a/tests/test_graph.py b/tests/test_graph.py index e1fe1b3..a4eb8fd 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,5 +1,33 @@ import os +import numpy as np +import pandas as pd import pytest -from gtfs_skims import graph \ No newline at end of file +from gtfs_skims import graph + + +def test_get_ivt_times(): + stop_times = pd.DataFrame({ + 'trip_id': [0, 1, 0, 1], + 'departure_s': [100, 105, 120, 150] + }) + ivt_edges = graph.get_ivt_edges(stop_times) + expected = np.array([ + [0, 2, 20], + [1, 3, 45] + ]) + np.testing.assert_equal(ivt_edges.values, expected) + + +def test_get_all_edges(gtfs_data_preprocessed, connectors_data): + edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data) + + len_expected = len(gtfs_data_preprocessed.stop_times) - gtfs_data_preprocessed.stop_times['trip_id'].nunique() + len_expected += len(connectors_data.connectors_transfer) + len_expected += len(connectors_data.connectors_access) + len_expected += len(connectors_data.connectors_egress) + assert len(edges) == len_expected + + assert list(edges.columns) == [ + 'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer'] From 18410ab8f89a008d8d02f28cffb0de05c736ec72 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Tue, 12 Dec 2023 14:14:33 +0000 Subject: [PATCH 16/30] shortest distances --- .gitignore | 2 +- gtfs_skims/graph.py | 90 +++++++++++++++++- .../outputs/connectors_access.parquet.gzip | Bin 0 -> 1600 bytes .../outputs/connectors_egress.parquet.gzip | Bin 0 -> 2385 bytes .../outputs/connectors_transfer.parquet.gzip | Bin 0 -> 1362 bytes tests/test_graph.py | 68 ++++++++++++- 6 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 tests/test_data/outputs/connectors_access.parquet.gzip create mode 100644 tests/test_data/outputs/connectors_egress.parquet.gzip create mode 100644 tests/test_data/outputs/connectors_transfer.parquet.gzip diff --git a/.gitignore b/.gitignore index 56c75cc..0cb40c9 100755 --- a/.gitignore +++ b/.gitignore @@ -40,5 +40,5 @@ mike-*.yml .ipynb_checkpoints sandbox.py -tests/test_data/outputs/ +# tests/test_data/outputs/ tests/test_data/outputs/*log \ No newline at end of file diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index 2d13ed0..3db94b2 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -1,4 +1,6 @@ -import os +from functools import partial +import multiprocessing +from typing import Optional from graph_tool import Graph from graph_tool.topology import shortest_distance @@ -49,3 +51,89 @@ def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.Da ], axis=0).fillna(0).map(int) return edges + + +def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame: + """Calculate generalised time and add it as a column to the 'edges' table. + + Args: + edges (pd.DataFrame): Edges dataframe. Should include these columns: + ['ivt', 'walk', 'wait', 'transfer'] + config (Config): Config object. + + Returns: + pd.DataFrame: Edges dataframe, with the generalised time ("gc") column included. + """ + edges['gc'] = edges['ivt'] +\ + edges['walk'] * config.weight_walk +\ + edges['wait'] * config.weight_wait +\ + edges['transfer'] * config.penalty_interchange + + # adding unweighted time as well + edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1) + + return edges['gc'] + + +def build_graph( + edges: pd.DataFrame, + vars=['ivt', 'walk', 'wait', 'time', 'gc'] +) -> Graph: + """Build a network graph from the edges table. + + Args: + edges (pd.DataFrame): Edges table. Should include the 'gc' and 'time' columns from the 'add_gc' method. + vars (list): list of variables to include in the graph as edge properties. + + Returns: + Graph: Connected GTFS graph + """ + eprops = [(x, 'int') for x in vars] + g = Graph( + edges[['onode', 'dnode']+vars].values, + hashed=False, + eprops=eprops + ) + return g + + +def get_shortest_distances_single( + graph: Graph, + onode: int, + dnodes: list[int], + max_dist: Optional[float] = None, + attribute: str = 'gc' +) -> np.ndarray: + d = shortest_distance(graph, onode, dnodes, + weights=graph.edge_properties[attribute], dense=False, + max_dist=max_dist, directed=True) + d = np.concatenate([np.array([onode]), d]) + + return d + + +def get_shortest_distances( + graph: Graph, + onodes: list[int], + dnodes: list[int], + max_dist: Optional[float] = None, + attribute: str = 'gc' +) -> pd.DataFrame: + n_cpus = multiprocessing.cpu_count() - 1 + dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes, + max_dist=max_dist, attribute=attribute) + with multiprocessing.Pool(n_cpus) as pool_obj: + dists = pool_obj.map(dist_wrapper, onodes) + + dists = np.array(dists) + dists = dists[dists[:, 0].argsort()] # sort by source node + + # convert to dataframe and reindex + dists = pd.DataFrame(dists[:, 1:], index=dists[:, 0], columns=dnodes) + dists = dists.loc[onodes] + + return dists + + +def main(): + pass diff --git a/tests/test_data/outputs/connectors_access.parquet.gzip b/tests/test_data/outputs/connectors_access.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..73e66e9f735a1dcad52c7bbc804b77540dd99f04 GIT binary patch literal 1600 zcmWG=3^EjD5S_v#nxG?U2V}8{axgH+cLNC!_`mk>MqUO67T1H{eVyJU{@)lP)p<@e z-Qgr_waxj*8U4RRn7V+*dLLX=O=8Syi#Qsild9nU($f7xa6&cq^}L0>bb&*bO&5ZOG>kAkgygqkwn; zo8aCs&yuLMULiu;3wc|;Uzqr-2(fOJ{$OtQbJE#2eG{ixUYlGMq7l2&`JIRKb>X!W zvqbJlt4RE`JfZimjOC4>&TpS|$5VC6YW%gC3(OxL^!l&*|2gy3oip|#9P$8%L(Tvl z;vnGmuwJ~?O+V06rs7e|8|(xo$rul| z;^d6f+(ckRi86^gh%!mA=I7<7q^e56`6;M;mh!}$Y=ktBn^^+nirGoZNcyO8$cQpY ziZVz-jS*!LvlH9FB-X+x$sopIBf+A~$iN`Mq5vet7#JjFfN>492N>yUJ&bUjU<<)I zcQJ|W0_p5w1ZtfMR3^bv4kXdE9%F)O1-lKVa}pC!=P@RrP7zj+&h3~wZ?GWg%q)TF zYy;`M!2;Cj57r5CAgY~W&p5^GfbN%ID@e>sNi3G=WUE$6&d({$&5O^>OG&LrEmpEp zh^|&j%S_EliO);SO$GAvN^^2_6qIsPOA=EOOA-+xsPcKGxdoN+C6xsrJxcjWS*gh- zNVQ(96~;zb8~~N5 z)loq6rxG{@@mmI0La}9_^oZXsm6g1HxTdQZNXACk8=&F*; x5};#{ZH=vEkkCVNvu;6UNk)F20@MHnxT6&`l1ejkQWOj{89G@Q7y^KG2LP8d%6aYv6ZtF>Ol|&6#)}MERl2*5yXNsop%4&`MrJbd*Aol@BMZ*$YYHQ z3P9CkP*S)fst^vX;V1+ESc##V?$WNR3+5a%0K8vbep7$7p4n8~*%{R&V>VSWn*xdj z#gv=AozJUzJ?CQs-)mB*Sin5iY#`@)=2>{Ip5}&3&0gadJ8cCV8%?@Inj-bb=9GNki!^uo##^c83EOl67qWf@))Iipi=Uz z+UKv8%Q~El=(!UmcZpu9kDM_C8+zUW$y1_t<)ir+rz!L^{*tO=yYr8wb`Y0GJaUv) zVx{*NOCK0IR^T0v&bU_VNO}y43NqGS3$1?`O0jzbrZjGynSt)^dUJcpv5fUF@{%vg48Y>$9uYwm?=Z?>Nb)ZWRrL`&BU9s zX}pGJQlHEy);_Hp9oTL5Yd6+cxGPgAY7nN}6z=JJe>cH3M;fWPpWyaf8lx!JxZaj> z6c03RT50lO<{JNtzN8L`b=E`Ax!iQ|;?}c($llXsWzc?NhM#DBr0J*kma?J3dir>B z!9qV|h+(ta(k0vA05IsB!RXF)ru$B~gG0i?3}b^lkEZN)QXPB(x_fP3(Uuz&VpU5q z2Hu5)8l)ml-g7N!PkWkuY0j*!k4vk3f3*Ky#ldZ{@j$B%$Bve|yKU*vm`&I0DMSc05zXTZ& zd_=%fuNW{8T&jC94qL2(trIWXe7Y>nb+c?iRHoIFoD+@vCcS^$VXv)hj*G_z_hZ8~ zW|8|Z=g-?`qF;&jd|j9vN9dh++v(O+W7<_k=5LdGX3frv?^w_pTCnMKyX52J=#v`nUXRyPsFnSV z^ZN<1w#l|G({Dx;sV8bT6pAa=3*S~%l+Dx}-50q;tawX3KDDok=2R3R|7iD)ySmgS ztMtt=;i2m)%Ca2JX}5&5frS>2DiX_HZwlP@A>8qGQmZIZ25VE5gb?rc-qMEtDou=Q3U%e# zxb>7wzpnE3>OPZ}MXcO*>nG1VUv((TS0#6Suy~VSyHtL|{U>s1*7NLxT?uJr)cFku zo=KI8<4H-{q@)<-t5(YzP6;V-a*t)b$Aee!x{_YodVkFTa5V?}WO#;)&?Hs6*s54J;(HpFZs!+Be_f-D;J#X=dP+);+;aU3>>4$iO` zis>U`SPYH1Z8!~u3_cW^7s4@EjQ>mo7G;P*0hqx)C>WG5AHbL^;TXUiu^0`TY5)Le znC`=94geS|bO0eHD4MB(4?*>m!%zbVs)0jTcRiK}-M_+i57Fx74MF#ghoId$S~uvh zqhV41q*a@Irqyrd6I6pSPk@kW69}o3I+|c}RV1`GXbWg~G>uKC@zDF>@%Swqrhvt! zGT3xb2=ed?abfZJ2nNWcQ`s~Y2*qpxlj(@Vvp_zLPUF*tpN!121+3^;DnB+_hrx5g zqrfeEJcRp1HI!g6gdlwmgUtscK`uUV6IAg@!D!TYy|ub7W3msIj1(Y%C`iOn1Pmq; z9dTdn6R$V%+hju}|CvlTl;7t1T=73pG4xK1{SI|ULMP^5Qt8d(f;2vd3wa)oXEMUM zG;XZU{|Fk7A5G(K7l8W4J3uav!C~t%L}wz2;Nnbh!9y0{^@nb>TfdTeoO!W literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/connectors_transfer.parquet.gzip b/tests/test_data/outputs/connectors_transfer.parquet.gzip new file mode 100644 index 0000000000000000000000000000000000000000..0d024133443bd21d0409710b9277e4dbcc9489de GIT binary patch literal 1362 zcmb`H&ui2`6vt;{3=1k^Au*8!!%&Nb?Xo{^rBo{(6h(>`4_Y>p&1^e1nZ#tGUDg#Z zf}r0013Y-~B0YHUKM?va*n^(Dde@iPG`p$og6JNGnY{OXpKs>9?E2c>mX7qge!Z@% zu+j7?LKk1blKp=5T*q<9H{v<_nrkBT<^0EY1_Y#Jb%x~g+4(h7Lvydstrq#S5I_5_ z^PYOoPoTwnS3VSyYcmi}&&LvbkfxnZLb2lTTt6nM}OjWl_?z)xy}RJt^&F_ zsrv!cGSB#=7%KQSwTN(!B<4$PhmcJlpqT8$@pqVg1DTn>M9JpGl!!2f zo)59l+?d3Jtp9zIh=|0GQYs5Srg6eTo<-WtcBj#5Hd+{3faTbAOB1jS&$7gr@J%XR zmF6%JBo+{Ff;tg2f^AD~!))7@6P3^0G_}!cbh2V#N-zOlrlA`nWHzQOFGT~f6>>W- XU>Bb4%Wlei&u%WFXDUML@NfDXkdt5% literal 0 HcmV?d00001 diff --git a/tests/test_graph.py b/tests/test_graph.py index a4eb8fd..373e77f 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,12 +1,43 @@ import os +from graph_tool import Graph import numpy as np import pandas as pd import pytest +from unittest.mock import Mock from gtfs_skims import graph +@pytest.fixture() +def mock_config(mocker): + mock = Mock() + mock.weight_wait = 3 + mock.weight_walk = 2 + mock.penalty_interchange = 600 + return mock + + +@pytest.fixture() +def small_graph() -> Graph: + edges = pd.DataFrame({ + 'onode': [0, 0, 1, 2], + 'dnode': [1, 2, 3, 3], + 'gc': [10, 20, 15, 4] + }) + return graph.build_graph(edges, vars=['gc']) + + +@pytest.fixture() +def small_graph_birectional() -> Graph: + edges = pd.DataFrame({ + 'onode': [0, 0, 1, 2, 1, 2, 3, 3], + 'dnode': [1, 2, 3, 3, 0, 0, 1, 2], + 'gc': [10, 20, 15, 4, 10, 20, 15, 4] + }) + return graph.build_graph(edges, vars=['gc']) + + def test_get_ivt_times(): stop_times = pd.DataFrame({ 'trip_id': [0, 1, 0, 1], @@ -23,11 +54,46 @@ def test_get_ivt_times(): def test_get_all_edges(gtfs_data_preprocessed, connectors_data): edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data) - len_expected = len(gtfs_data_preprocessed.stop_times) - gtfs_data_preprocessed.stop_times['trip_id'].nunique() + # all connections are included + len_expected = len(gtfs_data_preprocessed.stop_times) - \ + gtfs_data_preprocessed.stop_times['trip_id'].nunique() len_expected += len(connectors_data.connectors_transfer) len_expected += len(connectors_data.connectors_access) len_expected += len(connectors_data.connectors_egress) assert len(edges) == len_expected + # all variables are included assert list(edges.columns) == [ 'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer'] + + +def test_calculate_gc(mock_config): + edges = pd.DataFrame({ + 'ivt': [100, 200], + 'walk': [30, 10], + 'wait': [10, 5], + 'transfer': [0, 1] + }) + graph.add_gc(edges, mock_config) + assert list(edges['gc']) == [190, 835] + + +def test_get_shortest_distance_single(small_graph): + dists = graph.get_shortest_distances_single(small_graph, 0, [3, 2, 1, 0]) + expected = np.array([24, 20, 10, 0]) + assert dists[0] == 0 # the first value is the source + np.testing.assert_equal(dists[1:], expected) + + +def test_get_distance_matrix(small_graph_birectional): + distmat = graph.get_shortest_distances( + small_graph_birectional, [0, 1, 2], [1, 2]) + expected = np.array([ + [10, 20], + [0, 19], + [19, 0] + ]) + assert list(distmat.index) == [0, 1, 2] + assert list(distmat.columns) == [1, 2] + + np.testing.assert_equal(distmat.values, expected) From 0bf9fd9b563f5380fccfd15ea90530b5af1b0727 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Tue, 12 Dec 2023 15:17:56 +0000 Subject: [PATCH 17/30] complete workflow --- .gitignore | 2 +- gtfs_skims/cli.py | 12 +++++-- gtfs_skims/graph.py | 83 +++++++++++++++++++++++++++++++++++++++++++-- tests/test_cli.py | 6 +++- tests/test_graph.py | 6 ++++ 5 files changed, 102 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 0cb40c9..56c75cc 100755 --- a/.gitignore +++ b/.gitignore @@ -40,5 +40,5 @@ mike-*.yml .ipynb_checkpoints sandbox.py -# tests/test_data/outputs/ +tests/test_data/outputs/ tests/test_data/outputs/*log \ No newline at end of file diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py index 110072f..1632f43 100755 --- a/gtfs_skims/cli.py +++ b/gtfs_skims/cli.py @@ -5,6 +5,7 @@ from gtfs_skims.preprocessing import main as main_preprocessing from gtfs_skims.connectors import main as main_connectors +from gtfs_skims.graph import main as main_graph from gtfs_skims.utils import Config @@ -27,8 +28,15 @@ def run(config_path: str, output_directory_override: Optional[str] = None): config.path_outputs = output_directory_override steps = config.steps + gtfs_data = None + connectors_data = None + if 'preprocessing' in steps: - main_preprocessing(config=config) + gtfs_data = main_preprocessing(config=config) if 'connectors' in steps: - main_connectors(config=config) + connectors_data = main_connectors(config=config, data=gtfs_data) + + if 'graph' in steps: + main_graph(config=config, gtfs_data=gtfs_data, + connectors_data=connectors_data) diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index 3db94b2..68d75b5 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -1,5 +1,6 @@ from functools import partial import multiprocessing +import os from typing import Optional from graph_tool import Graph @@ -72,7 +73,7 @@ def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame: # adding unweighted time as well edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1) - return edges['gc'] + return edges def build_graph( @@ -104,6 +105,18 @@ def get_shortest_distances_single( max_dist: Optional[float] = None, attribute: str = 'gc' ) -> np.ndarray: + """Get shortest distances from a single origin. + + Args: + graph (Graph): GTFS graph. + onode (int): Source node. + dnodes (list[int]): Destination nodes. + max_dist (Optional[float], optional): Maximum search distance. Defaults to None. + attribute (str, optional): Edge weights attribute. Defaults to 'gc'. + + Returns: + np.ndarray: Shortest distances. The first value is the source node. + """ d = shortest_distance(graph, onode, dnodes, weights=graph.edge_properties[attribute], dense=False, max_dist=max_dist, directed=True) @@ -119,6 +132,20 @@ def get_shortest_distances( max_dist: Optional[float] = None, attribute: str = 'gc' ) -> pd.DataFrame: + """Get shortest distances from a set of origins to a set of destinations. + + Args: + graph (Graph): GTFS graph. + onode (int): Source nodes. + dnodes (list[int]): Destination nodes. + max_dist (Optional[float], optional): Maximum search distance. Defaults to None. + attribute (str, optional): Edge weights attribute. Defaults to 'gc'. + + Returns: + pd.DataFrame: Shortest distances matrix. + The dataframe indices are the origin nodes, + and the column indices are the destination nodes. + """ n_cpus = multiprocessing.cpu_count() - 1 dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes, max_dist=max_dist, attribute=attribute) @@ -135,5 +162,55 @@ def get_shortest_distances( return dists -def main(): - pass +def main( + config: Config, + gtfs_data: Optional[GTFSData] = None, + connectors_data: Optional[ConnectorsData] = None +) -> pd.DataFrame: + # read + logger = get_logger(os.path.join( + config.path_outputs, 'log_graph.log')) + + logger.info('Reading files...') + if gtfs_data is None: + gtfs_data = GTFSData.from_parquet(path=config.path_outputs) + if connectors_data is None: + connectors_data = ConnectorsData.from_parquet(path=config.path_outputs) + origins = pd.read_csv(config.path_origins, index_col=0) + destinations = pd.read_csv(config.path_destinations, index_col=0) + + # graph + logger.info('Building graph...') + edges = get_all_edges(gtfs_data, connectors_data) + edges = add_gc(edges=edges, config=config) + g = build_graph(edges=edges) + + # shortest paths + logger.info('Calculating shortest distances...') + origins['idx'] = range(len(origins)) + origins['idx'] += len(gtfs_data.stop_times) + destinations['idx'] = range(len(destinations)) + destinations['idx'] += (len(gtfs_data.stop_times)+len(origins)) + + onodes_graph = list(origins[origins['idx'].isin(edges['onode'])]['idx']) + dnodes_graph = list( + destinations[destinations['idx'].isin(edges['dnode'])]['idx']) + distmat = get_shortest_distances( + g, onodes=onodes_graph, dnodes=dnodes_graph) + + # map labels + distmat.index = distmat.index.map( + origins.reset_index().set_index('idx')['name'] + ) + distmat.columns = distmat.columns.map( + destinations.reset_index().set_index('idx')['name'] + ) + + # diagonal infilling + + # save + path = os.path.join(config.path_outputs, 'skims.parquet.gzip') + logger.info(f'Saving results to {path}...') + distmat.to_parquet(path, compression='gzip') + + return distmat diff --git a/tests/test_cli.py b/tests/test_cli.py index a8540fd..a0aa408 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -42,4 +42,8 @@ def test_run_steps_saves_outputs(tmpdir): for x in ['transfer', 'access', 'egress']: assert os.path.exists( os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') - ) \ No newline at end of file + ) + + assert os.path.exists( + os.path.join(tmpdir, f'skims.parquet.gzip') + ) diff --git a/tests/test_graph.py b/tests/test_graph.py index 373e77f..a1be1e6 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -97,3 +97,9 @@ def test_get_distance_matrix(small_graph_birectional): assert list(distmat.columns) == [1, 2] np.testing.assert_equal(distmat.values, expected) + + +def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir): + config.path_outputs = tmpdir + distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed, + connectors_data=connectors_data) From e3f1cbb7cfa7da0ab4e346911eb03627aaf9bfd8 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Tue, 12 Dec 2023 15:39:32 +0000 Subject: [PATCH 18/30] matrix expansion --- gtfs_skims/graph.py | 15 +++++++++------ tests/test_graph.py | 7 ++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index 68d75b5..af8b862 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -198,19 +198,22 @@ def main( distmat = get_shortest_distances( g, onodes=onodes_graph, dnodes=dnodes_graph) + # expand to the full OD space + distmat_full = pd.DataFrame( + np.inf, index=origins['idx'], columns=destinations['idx']) + distmat_full.loc[distmat.index, distmat.columns] = distmat.values + # map labels - distmat.index = distmat.index.map( + distmat_full.index = distmat_full.index.map( origins.reset_index().set_index('idx')['name'] ) - distmat.columns = distmat.columns.map( + distmat_full.columns = distmat_full.columns.map( destinations.reset_index().set_index('idx')['name'] ) - # diagonal infilling - # save path = os.path.join(config.path_outputs, 'skims.parquet.gzip') logger.info(f'Saving results to {path}...') - distmat.to_parquet(path, compression='gzip') + distmat_full.to_parquet(path, compression='gzip') - return distmat + return distmat_full diff --git a/tests/test_graph.py b/tests/test_graph.py index a1be1e6..6667e90 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -100,6 +100,11 @@ def test_get_distance_matrix(small_graph_birectional): def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir): + origins = pd.read_csv(config.path_origins, index_col=0) + destinations = pd.read_csv(config.path_destinations, index_col=0) config.path_outputs = tmpdir distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed, - connectors_data=connectors_data) + connectors_data=connectors_data) + + assert list(distmat.index) == list(origins.index) + assert list(distmat.columns) == list(destinations.index) From 40ff44f405075e60febfea12d09f2c4fce064724 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Tue, 12 Dec 2023 23:07:25 +0000 Subject: [PATCH 19/30] docs --- docs/index.md | 6 +++++- docs/methodology.md | 24 +++++++++++++++++++++++ docs/run.md | 48 +++++++++++++++++++++++++++++++++++++++++++++ gtfs_skims/graph.py | 12 ++++++++---- mkdocs.yml | 2 ++ 5 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 docs/methodology.md create mode 100644 docs/run.md diff --git a/docs/index.md b/docs/index.md index 8d4a99a..6010ddb 100755 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1,5 @@ ---8<-- "README.md:docs" \ No newline at end of file +--8<-- "README.md:docs" + +# Argo + +Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. \ No newline at end of file diff --git a/docs/methodology.md b/docs/methodology.md new file mode 100644 index 0000000..ea87663 --- /dev/null +++ b/docs/methodology.md @@ -0,0 +1,24 @@ +# Methodology + +Argo calculates generalised time matrices between a set of origin and destination points. + +Generalised time is defined as follows: + +$$ +gc = ivt + \beta_{wait} \cdot wait\_time + \beta_{walk} \cdot walk\_time + \beta_{interchange\_penalty} \cdot n\_transfers +$$ + +Some example values for the leg component weights are: + +$$ +\beta_{wait} = \beta_{walk} = 2-3 +$$ + +and + +$$ +\beta_{\text{interchange\_penalty}} = 5 \text{ to } 10 \text{ minutes} +$$ + +Walk distance is calculated as the crow's fly distance between two points, multiplied by a factor specified in the config file (typically ~1.3). + diff --git a/docs/run.md b/docs/run.md new file mode 100644 index 0000000..bfe1eb6 --- /dev/null +++ b/docs/run.md @@ -0,0 +1,48 @@ +# Running Argo + +To run argo simply type this command on the command line: +``` +argo run +``` +, where is the path to the config yaml file. + +An example config file is shown below: +``` +paths: + path_gtfs: ./tests/test_data/iow-bus-gtfs.zip + path_outputs: ./tests/test_data/outputs + path_origins: ./tests/test_data/centroids.csv # path to the origin points + path_destinations: ./tests/test_data/centroids.csv # path to the destination points + +settings: + calendar_date : 20190515 # yyyymmdd | Date for filtering the GTFS file. + start_s : 32400 # sec | Start time of the journey. + end_s : 41400 # sec | Max end time of a journey. + walk_distance_threshold : 2000 # m | Max walk distance in a leg + walk_speed : 4.5 # kph | Walking speed + crows_fly_factor : 1.3 # Conversion factor from euclidean to routed distances + max_transfer_time : 1800 # Max combined time of walking and waiting (sec) of a transfer + max_wait : 1800 # sec | Max wait time at a stop / leg + bounding_box : null + epsg_centroids: 27700 # coordinate system of the centroids file. Needs to be Cartesian and in meters. + weight_walk: 2 # value of walk time, ratio to in-vehicle time + weight_wait: 2 # value of wait time, ratio to in-vehicle time + penalty_interchange: 300 # seconds added to generalised cost for each interchange + +steps: + - preprocessing + - connectors + - graph +``` + +To run the example provided by the repo, use: +``` +argo run ./tests/test_data/config_demo.yaml +``` + +The time matrices will be saved in the `output_path` directory defined in the config file, in the `skims.parquet.gzip` file. An easy way to read the file is with pandas: +``` +import pandas as pd +df = pd.read_parquet('/skims.parquet.gzip') +df +``` \ No newline at end of file diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index af8b862..2965936 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -192,11 +192,11 @@ def main( destinations['idx'] = range(len(destinations)) destinations['idx'] += (len(gtfs_data.stop_times)+len(origins)) - onodes_graph = list(origins[origins['idx'].isin(edges['onode'])]['idx']) - dnodes_graph = list( + onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx']) + dnodes_scope = list( destinations[destinations['idx'].isin(edges['dnode'])]['idx']) distmat = get_shortest_distances( - g, onodes=onodes_graph, dnodes=dnodes_graph) + g, onodes=onodes_scope, dnodes=dnodes_scope) # expand to the full OD space distmat_full = pd.DataFrame( @@ -211,9 +211,13 @@ def main( destinations.reset_index().set_index('idx')['name'] ) + # infill intra_zonal + distmat_full = distmat_full.\ + apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0) + # save path = os.path.join(config.path_outputs, 'skims.parquet.gzip') logger.info(f'Saving results to {path}...') - distmat_full.to_parquet(path, compression='gzip') + distmat_full.to_parquet(path, compression='gzip', index=True) return distmat_full diff --git a/mkdocs.yml b/mkdocs.yml index 1064385..235e7ca 100755 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,6 +2,8 @@ site_name: gtfs-skims nav: - Home: index.md - Installation: installation.md + - Running the tool: run.md + - Methodology: methodology.md - Contributing: contributing.md - Changelog: CHANGELOG.md - Reference: From ddc4dfd85db9bd719c4f997ea404dc008a6edf77 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Wed, 13 Dec 2023 09:36:41 +0000 Subject: [PATCH 20/30] docs --- docs/index.md | 2 +- gtfs_skims/graph.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 6010ddb..50f4372 100755 --- a/docs/index.md +++ b/docs/index.md @@ -2,4 +2,4 @@ # Argo -Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. \ No newline at end of file +Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale: for example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). \ No newline at end of file diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index 2965936..1ed528e 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -195,8 +195,10 @@ def main( onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx']) dnodes_scope = list( destinations[destinations['idx'].isin(edges['dnode'])]['idx']) + maxdist = config.end_s - config.start_s distmat = get_shortest_distances( - g, onodes=onodes_scope, dnodes=dnodes_scope) + g, onodes=onodes_scope, dnodes=dnodes_scope, + max_dist=maxdist) # expand to the full OD space distmat_full = pd.DataFrame( @@ -214,6 +216,7 @@ def main( # infill intra_zonal distmat_full = distmat_full.\ apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0) + distmat_full = distmat_full.map(lambda x: np.where(x>=maxdist, np.inf, x)) # save path = os.path.join(config.path_outputs, 'skims.parquet.gzip') From 4638c03e2844d7344f7026451403c67e15f2a158 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Wed, 13 Dec 2023 09:37:31 +0000 Subject: [PATCH 21/30] release date --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e2a5dc..7e5d2c2 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed -## [v0.1.0] - 2023-11-28 +## [v0.1.0] - 2023-12-13 Initial release. From 68ca4a78465de6ecef07210d6e9fd8e2f0b2d32a Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Wed, 13 Dec 2023 09:50:18 +0000 Subject: [PATCH 22/30] more docs --- docs/methodology.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/methodology.md b/docs/methodology.md index ea87663..d8261e7 100644 --- a/docs/methodology.md +++ b/docs/methodology.md @@ -22,3 +22,16 @@ $$ Walk distance is calculated as the crow's fly distance between two points, multiplied by a factor specified in the config file (typically ~1.3). +The library creates a graph representation of the GTFS dataset, where the edges represent vehicle movements or connections (access/egress/transfer legs). It then applied a shortest-paths algorithm, using generalised time as edge weights. + +To achieve high performance, the user can limit the search space by: +* selecting a time scope and maximum travel time +* selecting a specific day +* selecting a maximum walk, wait and trasfer time for legs +* applying a spatial bounding box + +We further improve performance by: +* using K-dimensional trees to organise spatial data +* using the effiecient graph-tool library to calculate shortest distances +* parallelising the shortest distances calculation, and vectorising data transformation tasks +* saving files to a compressed parquet format \ No newline at end of file From fd44fd1321df15ecd660d6bd81c654444f3fa92c Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Wed, 13 Dec 2023 10:11:09 +0000 Subject: [PATCH 23/30] remove typing.Self references --- gtfs_skims/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index 0f796c1..2f28821 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -4,7 +4,7 @@ import logging import os from pathlib import Path -from typing import Optional, Self +from typing import Optional import yaml from zipfile import ZipFile @@ -207,7 +207,7 @@ def __repr__(self) -> str: @dataclass class Data: @classmethod - def from_gtfs(cls, path_gtfs: str) -> Self: + def from_gtfs(cls, path_gtfs: str) -> Data: """Load GTFS tables from a standard zipped GTFS file. Args: @@ -224,7 +224,7 @@ def from_gtfs(cls, path_gtfs: str) -> Self: return cls(**data) @classmethod - def from_parquet(cls, path: str) -> Self: + def from_parquet(cls, path: str) -> Data: """Construct class from pre-processed GTFS tables in Parquet format. Args: From 4b3476485fedd1e45a355ecaab66784a9e955c74 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Wed, 13 Dec 2023 10:21:47 +0000 Subject: [PATCH 24/30] remove windows support --- .github/workflows/pr-ci.yml | 2 +- docs/installation.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index a2aecaa..d26a9f3 100755 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -16,7 +16,7 @@ jobs: test: strategy: matrix: - os: [windows-latest, ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest] py3version: ["9", "11"] fail-fast: false uses: arup-group/actions-city-modelling-lab/.github/workflows/python-install-lint-test.yml@main diff --git a/docs/installation.md b/docs/installation.md index 3c1d127..876e1f6 100755 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,6 +1,8 @@ # Installation +Note: this library only supports Unix-based systems (ie Ubuntu/macOS). If you wish to use it on Windows please use the Windows Subsystem for Linux. + ## Setting up a user environment As a `gtfs_skims` user, it is easiest to install using the [mamba](https://mamba.readthedocs.io/en/latest/index.html) package manager, as follows: From d32ed32e2b3659c34f353ed9e6941584626b5249 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 15 Dec 2023 09:06:08 +0000 Subject: [PATCH 25/30] pre-commit checks --- README.md | 5 + docs/index.md | 4 +- gtfs_skims/cli.py | 20 ++-- gtfs_skims/connectors.py | 176 ++++++++++++++++++------------------ gtfs_skims/graph.py | 166 ++++++++++++++++++---------------- gtfs_skims/preprocessing.py | 107 +++++++--------------- gtfs_skims/utils.py | 100 ++++---------------- gtfs_skims/variables.py | 28 +++--- requirements/base.txt | 5 +- tests/conftest.py | 12 +-- tests/test_cli.py | 29 +++--- tests/test_connectors.py | 61 +++++-------- tests/test_graph.py | 69 ++++++-------- tests/test_preprocessing.py | 42 ++++----- tests/test_utils.py | 20 ++-- 15 files changed, 359 insertions(+), 485 deletions(-) diff --git a/README.md b/README.md index d6ddf93..7753847 100755 --- a/README.md +++ b/README.md @@ -9,6 +9,11 @@ + +Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. +By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. +For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). + ## Documentation For more detailed instructions, see our [documentation](https://arup-group.github.io/gtfs_skims/latest). diff --git a/docs/index.md b/docs/index.md index 50f4372..3909da0 100755 --- a/docs/index.md +++ b/docs/index.md @@ -2,4 +2,6 @@ # Argo -Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale: for example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). \ No newline at end of file +Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. +By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. +For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). \ No newline at end of file diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py index 1632f43..5c93373 100755 --- a/gtfs_skims/cli.py +++ b/gtfs_skims/cli.py @@ -1,11 +1,12 @@ """Console script for gtfs_skims.""" -import click from typing import Optional -from gtfs_skims.preprocessing import main as main_preprocessing +import click + from gtfs_skims.connectors import main as main_connectors from gtfs_skims.graph import main as main_graph +from gtfs_skims.preprocessing import main as main_preprocessing from gtfs_skims.utils import Config @@ -13,14 +14,12 @@ @click.group def cli(args=None): """Console script for gtfs_skims.""" - click.echo( - "Console script for Argo (gtfs_skims)." - ) + click.echo("Console script for Argo (gtfs_skims).") return 0 @cli.command() -@click.argument('config_path') +@click.argument("config_path") @click.option("--output_directory_override", default=None, help="override output directory") def run(config_path: str, output_directory_override: Optional[str] = None): config = Config.from_yaml(config_path) @@ -31,12 +30,11 @@ def run(config_path: str, output_directory_override: Optional[str] = None): gtfs_data = None connectors_data = None - if 'preprocessing' in steps: + if "preprocessing" in steps: gtfs_data = main_preprocessing(config=config) - if 'connectors' in steps: + if "connectors" in steps: connectors_data = main_connectors(config=config, data=gtfs_data) - if 'graph' in steps: - main_graph(config=config, gtfs_data=gtfs_data, - connectors_data=connectors_data) + if "graph" in steps: + main_graph(config=config, gtfs_data=gtfs_data, connectors_data=connectors_data) diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index a3536fd..b611756 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -1,14 +1,14 @@ from __future__ import annotations -from dataclasses import dataclass -from functools import cached_property + import os +from functools import cached_property from typing import Optional import numpy as np -from scipy.spatial import KDTree import pandas as pd +from scipy.spatial import KDTree -from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger +from gtfs_skims.utils import Config, ConnectorsData, GTFSData, get_logger from gtfs_skims.variables import DATA_TYPE @@ -27,13 +27,13 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array: ids = coords[:, 2].argsort() dtree = KDTree(coords[ids]) - connectors = dtree.query_pairs(r=radius, output_type='ndarray', p=2) + connectors = dtree.query_pairs(r=radius, output_type="ndarray", p=2) return ids[connectors] class TransferConnectors: - """ Manages transfer connectors. """ + """Manages transfer connectors.""" def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None: self.coords = coords @@ -65,12 +65,12 @@ def walk(self) -> np.array: Returns: np.array: Distance from origin to destination point (on the xy axis). """ - walk = ((self.dcoords[:, :2]-self.ocoords[:, :2])**2).sum(1)**0.5 + walk = ((self.dcoords[:, :2] - self.ocoords[:, :2]) ** 2).sum(1) ** 0.5 return walk @cached_property def wait(self) -> np.array: - """Wait distance. It is calculated as the difference between timestamps (dz) + """Wait distance. It is calculated as the difference between timestamps (dz) and the distance required to walk to the destination. Returns: @@ -106,7 +106,7 @@ def filter_feasible_transfer(self, maxdist: float) -> None: Args: maxdist (float): Maximum transfer distance (walk+wait) """ - is_feasible = (self.wait > 0) & ((self.walk+self.wait) <= maxdist) + is_feasible = (self.wait > 0) & ((self.walk + self.wait) <= maxdist) self.filter(is_feasible) def filter_max_walk(self, max_walk: float) -> None: @@ -115,7 +115,7 @@ def filter_max_walk(self, max_walk: float) -> None: Args: max_walk (float): Max walk distance """ - cond = (self.walk <= max_walk) + cond = self.walk <= max_walk self.filter(cond) def filter_max_wait(self, max_wait: float) -> None: @@ -132,9 +132,7 @@ def filter_same_route(self, routes: np.ndarray) -> None: Args: routes (np.array): Route IDs array. Its indexing matches the self.coords table. """ - self.filter( - routes[self.ods[:, 0]] != routes[self.ods[:, 1]] - ) + self.filter(routes[self.ods[:, 0]] != routes[self.ods[:, 1]]) def filter_nearest_service(self, services: np.ndarray) -> None: """If a service can be accessed from a origin through multiple stops, @@ -150,8 +148,8 @@ def filter_nearest_service(self, services: np.ndarray) -> None: idx_sorted = transfer.argsort() # create origin-service combinations - order_o = int(np.floor(np.log10(services.max()))+1) - comb = (self.ods[:, 0]+1) * 10**order_o + services_d + order_o = int(np.floor(np.log10(services.max())) + 1) + comb = (self.ods[:, 0] + 1) * 10**order_o + services_d # get first instance of each origin-service combination # (which corresponds to the most efficient transfer) @@ -162,9 +160,7 @@ def filter_nearest_service(self, services: np.ndarray) -> None: def query_pairs_od( - coords_origins: np.ndarray, - coords_destinations: np.ndarray, - radius: float + coords_origins: np.ndarray, coords_destinations: np.ndarray, radius: float ) -> np.array: """Get origin-destination pairs between points, within a radius. @@ -179,26 +175,24 @@ def query_pairs_od( tree_origins = KDTree(coords_origins) tree_destinations = KDTree(coords_destinations) - ods = tree_origins.query_ball_tree( - tree_destinations, r=radius) + ods = tree_origins.query_ball_tree(tree_destinations, r=radius) # flatten - ods = np.column_stack([ - np.repeat(range(len(coords_origins)), list(map(len, ods))), - np.concatenate(ods) - ]).astype(DATA_TYPE) + ods = np.column_stack( + [np.repeat(range(len(coords_origins)), list(map(len, ods))), np.concatenate(ods)] + ).astype(DATA_TYPE) return ods class AccessEgressConnectors(TransferConnectors): - """ Connections between zones/endpoints and stops """ + """Connections between zones/endpoints and stops""" def __init__( - self, - coords_origins: np.ndarray, - coords_destinations: np.ndarray, - max_transfer_distance: float + self, + coords_origins: np.ndarray, + coords_destinations: np.ndarray, + max_transfer_distance: float, ) -> None: self.coords_origins = coords_origins self.coords_destinations = coords_destinations @@ -207,8 +201,7 @@ def __init__( if coords_origins.shape[1] == 3: radius += max_transfer_distance * (2**0.5) - self.ods = query_pairs_od(coords_origins, coords_destinations, - radius=radius) + self.ods = query_pairs_od(coords_origins, coords_destinations, radius=radius) @cached_property def ocoords(self) -> np.array: @@ -239,14 +232,13 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: Returns: np.ndarray: [origin id, destination id, walk time, wait time] """ - time_to_distance = config.walk_speed/3.6 # km/hr to meters + time_to_distance = config.walk_speed / 3.6 # km/hr to meters max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance # get candidate connectors - coords = data.stop_times[['x', 'y', 'departure_s']].values - coords[:, :2] = coords[:, :2] * \ - config.crows_fly_factor # crow's fly transformation + coords = data.stop_times[["x", "y", "departure_s"]].values + coords[:, :2] = coords[:, :2] * config.crows_fly_factor # crow's fly transformation tc = TransferConnectors(coords, max_transfer_distance) # apply more narrow filters: @@ -262,23 +254,26 @@ def get_transfer_connectors(data: GTFSData, config: Config) -> np.array: tc.filter_max_wait(max_wait_distance) # not same route - routes = data.stop_times['trip_id'].map( - data.trips.set_index('trip_id')['route_id'] - ).values + routes = data.stop_times["trip_id"].map(data.trips.set_index("trip_id")["route_id"]).values tc.filter_same_route(routes) # most efficient transfer to service - services = data.stop_times['trip_id'].map( - data.trips.set_index('trip_id')['service_id'] - ).values + services = data.stop_times["trip_id"].map(data.trips.set_index("trip_id")["service_id"]).values tc.filter_nearest_service(services) # construct array - arr = np.concatenate([ - tc.ods, # origin and destination index - (tc.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) - (tc.wait/time_to_distance).reshape(-1, 1) # wait time (seconds) - ], axis=1).round(1).astype(DATA_TYPE) + arr = ( + np.concatenate( + [ + tc.ods, # origin and destination index + (tc.walk / time_to_distance).reshape(-1, 1), # walk time (seconds) + (tc.wait / time_to_distance).reshape(-1, 1), # wait time (seconds) + ], + axis=1, + ) + .round(1) + .astype(DATA_TYPE) + ) return arr @@ -289,25 +284,22 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame) Args: data (GTFSData): GTFS data object. config (Config): Config object. - destinations (pd.DataFrame): Origin coordinates dataframe. + destinations (pd.DataFrame): Origin coordinates dataframe. Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip start points. Returns: np.ndarray: [origin id, destination id, walk time, wait time] """ - time_to_distance = config.walk_speed/3.6 # km/hr to meters + time_to_distance = config.walk_speed / 3.6 # km/hr to meters max_transfer_distance = config.max_transfer_time * time_to_distance max_wait_distance = config.max_wait * time_to_distance # get candidate connectors - coords_stops = data.stop_times[['x', 'y', 'departure_s']].values - coords_stops[:, :2] = coords_stops[:, :2] * \ - config.crows_fly_factor # crow's fly transformation - coords_origins = (origins[['x', 'y']] * config.crows_fly_factor).\ - assign(z=config.start_s).values + coords_stops = data.stop_times[["x", "y", "departure_s"]].values + coords_stops[:, :2] = coords_stops[:, :2] * config.crows_fly_factor # crow's fly transformation + coords_origins = (origins[["x", "y"]] * config.crows_fly_factor).assign(z=config.start_s).values - ac = AccessEgressConnectors( - coords_origins, coords_stops, max_transfer_distance) + ac = AccessEgressConnectors(coords_origins, coords_stops, max_transfer_distance) # more narrow filtering ac.filter_feasible_transfer(max_transfer_distance) @@ -316,11 +308,18 @@ def get_access_connectors(data: GTFSData, config: Config, origins: pd.DataFrame) if max_wait_distance < max_transfer_distance: ac.filter_max_wait(max_wait_distance) - arr = np.concatenate([ - ac.ods, # origin and destination index - (ac.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) - (ac.wait/time_to_distance).reshape(-1, 1) # wait time (seconds) - ], axis=1).round(1).astype(DATA_TYPE) + arr = ( + np.concatenate( + [ + ac.ods, # origin and destination index + (ac.walk / time_to_distance).reshape(-1, 1), # walk time (seconds) + (ac.wait / time_to_distance).reshape(-1, 1), # wait time (seconds) + ], + axis=1, + ) + .round(1) + .astype(DATA_TYPE) + ) return arr @@ -331,29 +330,33 @@ def get_egress_connectors(data: GTFSData, config: Config, destinations: pd.DataF Args: data (GTFSData): GTFS data object. config (Config): Config object. - destinations (pd.DataFrame): Destination coordinates dataframe. + destinations (pd.DataFrame): Destination coordinates dataframe. Must include 'x' and 'y' columns, providing the cartesian coordinates of the trip ends. Returns: np.ndarray: [origin id, destination id, walk time, wait time] """ - time_to_distance = config.walk_speed/3.6 # km/hr to meters + time_to_distance = config.walk_speed / 3.6 # km/hr to meters # get candidate connectors - coords_stops = data.stop_times[['x', 'y']].values - coords_stops[:, :2] = coords_stops[:, :2] * \ - config.crows_fly_factor # crow's fly transformation - coords_destinations = ( - destinations[['x', 'y']] * config.crows_fly_factor).values - - ec = AccessEgressConnectors( - coords_stops, coords_destinations, config.walk_distance_threshold) - - arr = np.concatenate([ - ec.ods, # origin and destination index - (ec.walk/time_to_distance).reshape(-1, 1), # walk time (seconds) - np.array([0]*len(ec.ods)).reshape(-1, 1) # wait time = 0 - ], axis=1).round(1).astype(DATA_TYPE) + coords_stops = data.stop_times[["x", "y"]].values + coords_stops[:, :2] = coords_stops[:, :2] * config.crows_fly_factor # crow's fly transformation + coords_destinations = (destinations[["x", "y"]] * config.crows_fly_factor).values + + ec = AccessEgressConnectors(coords_stops, coords_destinations, config.walk_distance_threshold) + + arr = ( + np.concatenate( + [ + ec.ods, # origin and destination index + (ec.walk / time_to_distance).reshape(-1, 1), # walk time (seconds) + np.array([0] * len(ec.ods)).reshape(-1, 1), # wait time = 0 + ], + axis=1, + ) + .round(1) + .astype(DATA_TYPE) + ) return arr @@ -363,15 +366,14 @@ def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData: Args: config (Config): Config object. - data (Optional[GTFSData], optional): GTFS data object. - If not provided, reads the stored parquet files from the outputs directory. + data (Optional[GTFSData], optional): GTFS data object. + If not provided, reads the stored parquet files from the outputs directory. Defaults to None. Returns: ConnectorsData: Connectors object, holding the three output tables. """ - logger = get_logger(os.path.join( - config.path_outputs, 'log_connectors.log')) + logger = get_logger(os.path.join(config.path_outputs, "log_connectors.log")) if data is None: data = GTFSData.from_parquet(config.path_outputs) @@ -379,25 +381,25 @@ def main(config: Config, data: Optional[GTFSData] = None) -> ConnectorsData: destinations = pd.read_csv(config.path_destinations, index_col=0) # get feasible connections - logger.info('Getting transfer connectors...') + logger.info("Getting transfer connectors...") connectors_transfer = get_transfer_connectors(data, config) - logger.info('Getting access connectors...') + logger.info("Getting access connectors...") connectors_access = get_access_connectors(data, config, origins) - logger.info('Getting egress connectors...') + logger.info("Getting egress connectors...") connectors_egress = get_egress_connectors(data, config, destinations) # convert to dataframe - colnames = ['onode', 'dnode', 'walk', 'wait'] + colnames = ["onode", "dnode", "walk", "wait"] connectors_transfer = pd.DataFrame(connectors_transfer, columns=colnames) connectors_access = pd.DataFrame(connectors_access, columns=colnames) connectors_egress = pd.DataFrame(connectors_egress, columns=colnames) # offset IDs for endpoints - connectors_access['onode'] += len(data.stop_times) - connectors_egress['dnode'] += (len(data.stop_times)+len(origins)) + connectors_access["onode"] += len(data.stop_times) + connectors_egress["dnode"] += len(data.stop_times) + len(origins) # save - logger.info(f'Saving connectors to {config.path_outputs}...') + logger.info(f"Saving connectors to {config.path_outputs}...") connectors = ConnectorsData( connectors_transfer=connectors_transfer, connectors_access=connectors_access, diff --git a/gtfs_skims/graph.py b/gtfs_skims/graph.py index 1ed528e..3ce8af8 100644 --- a/gtfs_skims/graph.py +++ b/gtfs_skims/graph.py @@ -1,14 +1,14 @@ -from functools import partial import multiprocessing import os +from functools import partial from typing import Optional -from graph_tool import Graph -from graph_tool.topology import shortest_distance import numpy as np import pandas as pd +from graph_tool import Graph +from graph_tool.topology import shortest_distance -from gtfs_skims.utils import Config, GTFSData, ConnectorsData, get_logger +from gtfs_skims.utils import Config, ConnectorsData, GTFSData, get_logger def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame: @@ -21,15 +21,22 @@ def get_ivt_edges(stop_times: pd.DataFrame) -> pd.DataFrame: np.ndarray: [origin id, destination id, in-vehicle time] """ edges_ivt = pd.Series(range(len(stop_times))) - trip_id = stop_times.reset_index()['trip_id'] - departures = stop_times.reset_index()['departure_s'] - - edges_ivt = pd.concat([ - edges_ivt, - edges_ivt.groupby(trip_id).shift(-1), - departures.groupby(trip_id).shift(-1) - departures, - ], axis=1).dropna().map(int) - edges_ivt.columns = ['onode', 'dnode', 'ivt'] + trip_id = stop_times.reset_index()["trip_id"] + departures = stop_times.reset_index()["departure_s"] + + edges_ivt = ( + pd.concat( + [ + edges_ivt, + edges_ivt.groupby(trip_id).shift(-1), + departures.groupby(trip_id).shift(-1) - departures, + ], + axis=1, + ) + .dropna() + .map(int) + ) + edges_ivt.columns = ["onode", "dnode", "ivt"] return edges_ivt @@ -44,12 +51,19 @@ def get_all_edges(gtfs_data: GTFSData, connectors_data: ConnectorsData) -> pd.Da Returns: pd.DataFrame: ['onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer'] """ - edges = pd.concat([ - get_ivt_edges(gtfs_data.stop_times), - connectors_data.connectors_transfer.assign(transfer=1), - connectors_data.connectors_access, - connectors_data.connectors_egress, - ], axis=0).fillna(0).map(int) + edges = ( + pd.concat( + [ + get_ivt_edges(gtfs_data.stop_times), + connectors_data.connectors_transfer.assign(transfer=1), + connectors_data.connectors_access, + connectors_data.connectors_egress, + ], + axis=0, + ) + .fillna(0) + .map(int) + ) return edges @@ -58,28 +72,27 @@ def add_gc(edges: pd.DataFrame, config: Config) -> pd.DataFrame: """Calculate generalised time and add it as a column to the 'edges' table. Args: - edges (pd.DataFrame): Edges dataframe. Should include these columns: + edges (pd.DataFrame): Edges dataframe. Should include these columns: ['ivt', 'walk', 'wait', 'transfer'] config (Config): Config object. Returns: pd.DataFrame: Edges dataframe, with the generalised time ("gc") column included. """ - edges['gc'] = edges['ivt'] +\ - edges['walk'] * config.weight_walk +\ - edges['wait'] * config.weight_wait +\ - edges['transfer'] * config.penalty_interchange + edges["gc"] = ( + edges["ivt"] + + edges["walk"] * config.weight_walk + + edges["wait"] * config.weight_wait + + edges["transfer"] * config.penalty_interchange + ) # adding unweighted time as well - edges['time'] = edges[['ivt', 'walk', 'wait']].sum(1) + edges["time"] = edges[["ivt", "walk", "wait"]].sum(1) return edges -def build_graph( - edges: pd.DataFrame, - vars=['ivt', 'walk', 'wait', 'time', 'gc'] -) -> Graph: +def build_graph(edges: pd.DataFrame, vars=["ivt", "walk", "wait", "time", "gc"]) -> Graph: """Build a network graph from the edges table. Args: @@ -89,21 +102,17 @@ def build_graph( Returns: Graph: Connected GTFS graph """ - eprops = [(x, 'int') for x in vars] - g = Graph( - edges[['onode', 'dnode']+vars].values, - hashed=False, - eprops=eprops - ) + eprops = [(x, "int") for x in vars] + g = Graph(edges[["onode", "dnode"] + vars].values, hashed=False, eprops=eprops) return g def get_shortest_distances_single( - graph: Graph, - onode: int, - dnodes: list[int], - max_dist: Optional[float] = None, - attribute: str = 'gc' + graph: Graph, + onode: int, + dnodes: list[int], + max_dist: Optional[float] = None, + attribute: str = "gc", ) -> np.ndarray: """Get shortest distances from a single origin. @@ -117,20 +126,26 @@ def get_shortest_distances_single( Returns: np.ndarray: Shortest distances. The first value is the source node. """ - d = shortest_distance(graph, onode, dnodes, - weights=graph.edge_properties[attribute], dense=False, - max_dist=max_dist, directed=True) + d = shortest_distance( + graph, + onode, + dnodes, + weights=graph.edge_properties[attribute], + dense=False, + max_dist=max_dist, + directed=True, + ) d = np.concatenate([np.array([onode]), d]) return d def get_shortest_distances( - graph: Graph, - onodes: list[int], - dnodes: list[int], - max_dist: Optional[float] = None, - attribute: str = 'gc' + graph: Graph, + onodes: list[int], + dnodes: list[int], + max_dist: Optional[float] = None, + attribute: str = "gc", ) -> pd.DataFrame: """Get shortest distances from a set of origins to a set of destinations. @@ -147,8 +162,9 @@ def get_shortest_distances( and the column indices are the destination nodes. """ n_cpus = multiprocessing.cpu_count() - 1 - dist_wrapper = partial(get_shortest_distances_single, graph, dnodes=dnodes, - max_dist=max_dist, attribute=attribute) + dist_wrapper = partial( + get_shortest_distances_single, graph, dnodes=dnodes, max_dist=max_dist, attribute=attribute + ) with multiprocessing.Pool(n_cpus) as pool_obj: dists = pool_obj.map(dist_wrapper, onodes) @@ -165,13 +181,12 @@ def get_shortest_distances( def main( config: Config, gtfs_data: Optional[GTFSData] = None, - connectors_data: Optional[ConnectorsData] = None + connectors_data: Optional[ConnectorsData] = None, ) -> pd.DataFrame: # read - logger = get_logger(os.path.join( - config.path_outputs, 'log_graph.log')) + logger = get_logger(os.path.join(config.path_outputs, "log_graph.log")) - logger.info('Reading files...') + logger.info("Reading files...") if gtfs_data is None: gtfs_data = GTFSData.from_parquet(path=config.path_outputs) if connectors_data is None: @@ -180,47 +195,40 @@ def main( destinations = pd.read_csv(config.path_destinations, index_col=0) # graph - logger.info('Building graph...') + logger.info("Building graph...") edges = get_all_edges(gtfs_data, connectors_data) edges = add_gc(edges=edges, config=config) g = build_graph(edges=edges) # shortest paths - logger.info('Calculating shortest distances...') - origins['idx'] = range(len(origins)) - origins['idx'] += len(gtfs_data.stop_times) - destinations['idx'] = range(len(destinations)) - destinations['idx'] += (len(gtfs_data.stop_times)+len(origins)) - - onodes_scope = list(origins[origins['idx'].isin(edges['onode'])]['idx']) - dnodes_scope = list( - destinations[destinations['idx'].isin(edges['dnode'])]['idx']) + logger.info("Calculating shortest distances...") + origins["idx"] = range(len(origins)) + origins["idx"] += len(gtfs_data.stop_times) + destinations["idx"] = range(len(destinations)) + destinations["idx"] += len(gtfs_data.stop_times) + len(origins) + + onodes_scope = list(origins[origins["idx"].isin(edges["onode"])]["idx"]) + dnodes_scope = list(destinations[destinations["idx"].isin(edges["dnode"])]["idx"]) maxdist = config.end_s - config.start_s - distmat = get_shortest_distances( - g, onodes=onodes_scope, dnodes=dnodes_scope, - max_dist=maxdist) + distmat = get_shortest_distances(g, onodes=onodes_scope, dnodes=dnodes_scope, max_dist=maxdist) # expand to the full OD space - distmat_full = pd.DataFrame( - np.inf, index=origins['idx'], columns=destinations['idx']) + distmat_full = pd.DataFrame(np.inf, index=origins["idx"], columns=destinations["idx"]) distmat_full.loc[distmat.index, distmat.columns] = distmat.values # map labels - distmat_full.index = distmat_full.index.map( - origins.reset_index().set_index('idx')['name'] - ) + distmat_full.index = distmat_full.index.map(origins.reset_index().set_index("idx")["name"]) distmat_full.columns = distmat_full.columns.map( - destinations.reset_index().set_index('idx')['name'] + destinations.reset_index().set_index("idx")["name"] ) # infill intra_zonal - distmat_full = distmat_full.\ - apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0) - distmat_full = distmat_full.map(lambda x: np.where(x>=maxdist, np.inf, x)) + distmat_full = distmat_full.apply(lambda x: np.where(x.name == x.index, np.nan, x), axis=0) + distmat_full = distmat_full.map(lambda x: np.where(x >= maxdist, np.inf, x)) # save - path = os.path.join(config.path_outputs, 'skims.parquet.gzip') - logger.info(f'Saving results to {path}...') - distmat_full.to_parquet(path, compression='gzip', index=True) + path = os.path.join(config.path_outputs, "skims.parquet.gzip") + logger.info(f"Saving results to {path}...") + distmat_full.to_parquet(path, compression="gzip", index=True) return distmat_full diff --git a/gtfs_skims/preprocessing.py b/gtfs_skims/preprocessing.py index 419e82e..0299da6 100644 --- a/gtfs_skims/preprocessing.py +++ b/gtfs_skims/preprocessing.py @@ -2,8 +2,7 @@ import pyproj -from gtfs_skims.utils import ( - GTFSData, Config, get_weekday, ts_to_sec, get_logger) +from gtfs_skims.utils import Config, GTFSData, get_logger, get_weekday, ts_to_sec def filter_day(data: GTFSData, date: int) -> None: @@ -15,34 +14,18 @@ def filter_day(data: GTFSData, date: int) -> None: """ weekday = get_weekday(date) data.calendar = data.calendar[ - (data.calendar['start_date'] <= date) & - (data.calendar['end_date'] >= date) & - (data.calendar[weekday] == 1) + (data.calendar["start_date"] <= date) + & (data.calendar["end_date"] >= date) + & (data.calendar[weekday] == 1) ] - data.trips = data.trips[ - data.trips['service_id'].isin( - set(data.calendar['service_id']) - ) - ] + data.trips = data.trips[data.trips["service_id"].isin(set(data.calendar["service_id"]))] - data.routes = data.routes[ - data.routes['route_id'].isin( - set(data.trips['route_id']) - ) - ] + data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))] - data.stop_times = data.stop_times[ - data.stop_times['trip_id'].isin( - set(data.trips['trip_id']) - ) - ] + data.stop_times = data.stop_times[data.stop_times["trip_id"].isin(set(data.trips["trip_id"]))] - data.stops = data.stops[ - data.stops['stop_id'].isin( - set(data.stop_times['stop_id']) - ) - ] + data.stops = data.stops[data.stops["stop_id"].isin(set(data.stop_times["stop_id"]))] def filter_time(data: GTFSData, start_time: int, end_time: int) -> None: @@ -54,29 +37,20 @@ def filter_time(data: GTFSData, start_time: int, end_time: int) -> None: end_time (int): End of the time window (seconds from midnight) """ # filter stop times - data.stop_times['departure_s'] = data.stop_times['departure_time'].apply( - ts_to_sec) - data.stop_times['arrival_s'] = data.stop_times['arrival_time'].apply( - ts_to_sec) + data.stop_times["departure_s"] = data.stop_times["departure_time"].apply(ts_to_sec) + data.stop_times["arrival_s"] = data.stop_times["arrival_time"].apply(ts_to_sec) data.stop_times = data.stop_times[ - (data.stop_times['arrival_s'] >= start_time) & - (data.stop_times['departure_s'] <= end_time) + (data.stop_times["arrival_s"] >= start_time) & (data.stop_times["departure_s"] <= end_time) ] # filter stops - data.stops = data.stops[data.stops['stop_id'].isin( - set(data.stop_times['stop_id']) - )] + data.stops = data.stops[data.stops["stop_id"].isin(set(data.stop_times["stop_id"]))] # filter trips - data.trips = data.trips[data.trips['trip_id'].isin( - set(data.stop_times['trip_id']) - )] + data.trips = data.trips[data.trips["trip_id"].isin(set(data.stop_times["trip_id"]))] # filter routes - data.routes = data.routes[data.routes['route_id'].isin( - set(data.trips['route_id']) - )] + data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))] def add_coordinates(data: GTFSData, epsg: int = 27700) -> None: @@ -87,22 +61,18 @@ def add_coordinates(data: GTFSData, epsg: int = 27700) -> None: epsg (int): The target coordinate system """ transformer = pyproj.Transformer.from_crs( - pyproj.transformer.CRS('epsg:4326'), - pyproj.transformer.CRS(f'epsg:{epsg}'), always_xy=True) + pyproj.transformer.CRS("epsg:4326"), pyproj.transformer.CRS(f"epsg:{epsg}"), always_xy=True + ) - data.stops['x'], data.stops['y'] = transformer.transform( - data.stops['stop_lon'], data.stops['stop_lat'] + data.stops["x"], data.stops["y"] = transformer.transform( + data.stops["stop_lon"], data.stops["stop_lat"] ) - data.stops['x'] = data.stops['x'].round().map(int) - data.stops['y'] = data.stops['y'].round().map(int) + data.stops["x"] = data.stops["x"].round().map(int) + data.stops["y"] = data.stops["y"].round().map(int) - data.stop_times['x'] = data.stop_times['stop_id'].map( - data.stops.set_index('stop_id')['x'] - ) - data.stop_times['y'] = data.stop_times['stop_id'].map( - data.stops.set_index('stop_id')['y'] - ) + data.stop_times["x"] = data.stop_times["stop_id"].map(data.stops.set_index("stop_id")["x"]) + data.stop_times["y"] = data.stop_times["stop_id"].map(data.stops.set_index("stop_id")["y"]) def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: int) -> None: @@ -116,28 +86,22 @@ def filter_bounding_box(data: GTFSData, xmin: int, xmax: int, ymin: int, ymax: i ymax (int): Max Northing """ data.stops = data.stops[ - (data.stops['x'] >= xmin) & - (data.stops['x'] <= xmax) & - (data.stops['y'] >= ymin) & - (data.stops['y'] <= ymax) + (data.stops["x"] >= xmin) + & (data.stops["x"] <= xmax) + & (data.stops["y"] >= ymin) + & (data.stops["y"] <= ymax) ] # filter stop times data.stop_times = data.stop_times[ - data.stop_times['stop_id'].isin( - set(list(data.stops['stop_id'])) - ) + data.stop_times["stop_id"].isin(set(list(data.stops["stop_id"]))) ] # filter trips - data.trips = data.trips[data.trips['trip_id'].isin( - set(data.stop_times['trip_id']) - )] + data.trips = data.trips[data.trips["trip_id"].isin(set(data.stop_times["trip_id"]))] # filter routes - data.routes = data.routes[data.routes['route_id'].isin( - set(data.trips['route_id']) - )] + data.routes = data.routes[data.routes["route_id"].isin(set(data.trips["route_id"]))] def main(config: Config) -> GTFSData: @@ -149,24 +113,23 @@ def main(config: Config) -> GTFSData: Returns: GTFSData: Pre-processed GTFS data object. """ - logger = get_logger(os.path.join( - config.path_outputs, 'log_preprocessing.log')) + logger = get_logger(os.path.join(config.path_outputs, "log_preprocessing.log")) - logger.info('Reading files...') + logger.info("Reading files...") data = GTFSData.from_gtfs(path_gtfs=config.path_gtfs) - logger.info('Time filtering..') + logger.info("Time filtering..") filter_day(data, config.calendar_date) filter_time(data, config.start_s, config.end_s) add_coordinates(data, epsg=config.epsg_centroids) if config.bounding_box is not None: - logger.info('Cropping to bounding box..') + logger.info("Cropping to bounding box..") filter_bounding_box(data, **config.bounding_box) - logger.info(f'Saving outputs at {config.path_outputs}') + logger.info(f"Saving outputs at {config.path_outputs}") data.save(config.path_outputs) - logger.info(f'Preprocessing complete.') + logger.info("Preprocessing complete.") return data diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index 2f28821..4c9dda5 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -1,14 +1,15 @@ from __future__ import annotations -from dataclasses import dataclass -from datetime import datetime + import logging import os +from dataclasses import dataclass +from datetime import datetime from pathlib import Path from typing import Optional -import yaml from zipfile import ZipFile import pandas as pd +import yaml def ts_to_sec(x: str) -> int: @@ -20,8 +21,8 @@ def ts_to_sec(x: str) -> int: Returns: int: Seconds from midnight """ - s = [int(i) for i in x.split(':')] - return 3600*s[0]+60*s[1]+s[2] + s = [int(i) for i in x.split(":")] + return 3600 * s[0] + 60 * s[1] + s[2] def get_weekday(date: int) -> str: @@ -33,8 +34,8 @@ def get_weekday(date: int) -> str: Returns: str: Day name """ - weekday = datetime.strptime(str(date), '%Y%m%d') - weekday = datetime.strftime(weekday, '%A').lower() + weekday = datetime.strptime(str(date), "%Y%m%d") + weekday = datetime.strftime(weekday, "%A").lower() return weekday @@ -50,8 +51,7 @@ def get_logger(path_output: Optional[str] = None) -> logging.Logger: logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) if len(logger.handlers) == 0: logger.addHandler(handler) @@ -63,7 +63,7 @@ def get_logger(path_output: Optional[str] = None) -> logging.Logger: if not os.path.exists(parent_dir): os.makedirs(parent_dir) - file_handler = logging.FileHandler(path_output, mode='w') + file_handler = logging.FileHandler(path_output, mode="w") file_handler.setFormatter(formatter) logger.addHandler(file_handler) @@ -104,6 +104,7 @@ class Config: ``` """ + path_gtfs: str path_outputs: str path_origins: str @@ -133,82 +134,23 @@ def from_yaml(cls, path: str) -> Config: Returns: Config: Config object """ - with open(path, 'r') as f: + with open(path, "r") as f: config = yaml.safe_load(f) - config_flat = { - **config['paths'], - **config['settings'], - 'steps': config['steps'] - } + config_flat = {**config["paths"], **config["settings"], "steps": config["steps"]} return cls(**config_flat) def __repr__(self) -> str: - s = 'Config file\n' - s += '-'*50 + '\n' + s = "Config file\n" + s += "-" * 50 + "\n" s += yaml.dump(self.__dict__) return s -# @dataclass -# class GTFSData: -# calendar: pd.DataFrame -# routes: pd.DataFrame -# stops: pd.DataFrame -# stop_times: pd.DataFrame -# trips: pd.DataFrame - -# @classmethod -# def from_gtfs(cls, path_gtfs: str) -> GTFSData: -# """Load GTFS tables from a standard zipped GTFS file. - -# Args: -# path_gtfs (str): Path to a zipped GTFS dataset. - -# Returns: -# GTFSData: GTFS data object. -# """ -# data = {} -# with ZipFile(path_gtfs, 'r') as zf: -# for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: -# with zf.open(f'{name}.txt') as f: -# data[name] = pd.read_csv(f, low_memory=False) -# return cls(**data) - -# @classmethod -# def from_parquet(cls, path: str) -> GTFSData: -# """Construct class from pre-processed GTFS tables in Parquet format. - -# Args: -# path (str): Path to tables. - -# Returns: -# GTFSData: GTFS data object. -# """ -# data = {} -# for name in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: -# data[name] = pd.read_parquet( -# os.path.join(path, f'{name}.parquet.gzip')) -# return cls(**data) - -# def save(self, path_outputs: str) -> None: -# """Export all tables in zipped parquet format. - -# Args: -# path_outputs (str): Directory to save outputs. -# """ -# if not os.path.exists(path_outputs): -# os.makedirs(path_outputs) - -# for k, v in self.__dict__.items(): -# v.to_parquet(os.path.join( -# path_outputs, f'{k}.parquet.gzip'), compression='gzip') - - @dataclass class Data: @classmethod def from_gtfs(cls, path_gtfs: str) -> Data: - """Load GTFS tables from a standard zipped GTFS file. + """Load GTFS tables from a standard zipped GTFS file. Args: path_gtfs (str): Path to a zipped GTFS dataset. @@ -217,9 +159,9 @@ def from_gtfs(cls, path_gtfs: str) -> Data: GTFSData: GTFS data object. """ data = {} - with ZipFile(path_gtfs, 'r') as zf: + with ZipFile(path_gtfs, "r") as zf: for name in cls.__annotations__.keys(): - with zf.open(f'{name}.txt') as f: + with zf.open(f"{name}.txt") as f: data[name] = pd.read_csv(f, low_memory=False) return cls(**data) @@ -235,8 +177,7 @@ def from_parquet(cls, path: str) -> Data: """ data = {} for name in cls.__annotations__.keys(): - data[name] = pd.read_parquet( - os.path.join(path, f'{name}.parquet.gzip')) + data[name] = pd.read_parquet(os.path.join(path, f"{name}.parquet.gzip")) return cls(**data) def save(self, path_outputs: str) -> None: @@ -249,8 +190,7 @@ def save(self, path_outputs: str) -> None: os.makedirs(path_outputs) for k, v in self.__dict__.items(): - v.to_parquet(os.path.join( - path_outputs, f'{k}.parquet.gzip'), compression='gzip') + v.to_parquet(os.path.join(path_outputs, f"{k}.parquet.gzip"), compression="gzip") @dataclass diff --git a/gtfs_skims/variables.py b/gtfs_skims/variables.py index c6ded4e..fd70251 100644 --- a/gtfs_skims/variables.py +++ b/gtfs_skims/variables.py @@ -6,17 +6,17 @@ # source: https://developers.google.com/transit/gtfs/reference#routestxt # and https://developers.google.com/transit/gtfs/reference/extended-route-types ROUTE_TYPES = { - 0: 'tram', # Tram, Streetcar, Light rail. - 1: 'underground', # Subway, Metro. - 2: 'rail', # Rail. Used for intercity or long-distance travel. - 3: 'bus', # Bus. Used for short- and long-distance bus routes. - 4: 'ferry', # Ferry. Used for short- and long-distance boat service. - 5: 'cable', - 6: 'cable aerial', - 7: 'furnicular', # Funicular. Any rail system designed for steep inclines. - 11: 'trolley', # Trolleybus. - 12: 'monorail', # Monorail. - 200: 'coach', # Coach Service - 401: 'undergound', # Metro Service - 402: 'underground', # Underground Service -} \ No newline at end of file + 0: "tram", # Tram, Streetcar, Light rail. + 1: "underground", # Subway, Metro. + 2: "rail", # Rail. Used for intercity or long-distance travel. + 3: "bus", # Bus. Used for short- and long-distance bus routes. + 4: "ferry", # Ferry. Used for short- and long-distance boat service. + 5: "cable", + 6: "cable aerial", + 7: "furnicular", # Funicular. Any rail system designed for steep inclines. + 11: "trolley", # Trolleybus. + 12: "monorail", # Monorail. + 200: "coach", # Coach Service + 401: "undergound", # Metro Service + 402: "underground", # Underground Service +} diff --git a/requirements/base.txt b/requirements/base.txt index b1ccdb9..4c3de05 100755 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,8 +1,9 @@ -# this dependency exists so that the base file is not empty -# it was chosen since it is a dependency that is included in any python environment already +click fastparquet graph-tool numpy pandas pyproj +pyyaml +yaml zipp \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 824ee6e..b2486e7 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,9 +12,9 @@ def test_content(response): import pytest -from gtfs_skims.utils import Config, GTFSData, ConnectorsData +from gtfs_skims.utils import Config, ConnectorsData, GTFSData -TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data') +TEST_DATA_DIR = os.path.join(Path(__file__).parent, "test_data") @pytest.fixture @@ -29,19 +29,19 @@ def response(): @pytest.fixture def config(): - return Config.from_yaml(os.path.join(TEST_DATA_DIR, 'config_demo.yaml')) + return Config.from_yaml(os.path.join(TEST_DATA_DIR, "config_demo.yaml")) @pytest.fixture def gtfs_data(): - return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, 'iow-bus-gtfs.zip')) + return GTFSData.from_gtfs(os.path.join(TEST_DATA_DIR, "iow-bus-gtfs.zip")) @pytest.fixture def gtfs_data_preprocessed(): - return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs')) + return GTFSData.from_parquet(os.path.join(TEST_DATA_DIR, "outputs")) @pytest.fixture def connectors_data(): - return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, 'outputs')) + return ConnectorsData.from_parquet(os.path.join(TEST_DATA_DIR, "outputs")) diff --git a/tests/test_cli.py b/tests/test_cli.py index a0aa408..9e61d4a 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,7 +6,7 @@ from gtfs_skims import cli -TEST_DATA_DIR = os.path.join(Path(__file__).parent, 'test_data') +TEST_DATA_DIR = os.path.join(Path(__file__).parent, "test_data") def test_command_line_interface(): @@ -19,8 +19,7 @@ def test_command_line_interface(): assert ( "Console script for gtfs_skims.\n\nOptions:\n " "--version Show the version and exit.\n " - "--help Show this message and exit.\n" - in help_result.output + "--help Show this message and exit.\n" in help_result.output ) @@ -28,22 +27,20 @@ def test_run_steps_saves_outputs(tmpdir): runner = CliRunner() result = runner.invoke( cli.cli, - ['run', os.path.join(TEST_DATA_DIR, 'config_demo.yaml'), - '--output_directory_override', tmpdir] + [ + "run", + os.path.join(TEST_DATA_DIR, "config_demo.yaml"), + "--output_directory_override", + tmpdir, + ], ) assert result.exit_code == 0 - for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: - assert os.path.exists( - os.path.join(tmpdir, f'{x}.parquet.gzip') - ) + for x in ["calendar", "routes", "stops", "stop_times", "trips"]: + assert os.path.exists(os.path.join(tmpdir, f"{x}.parquet.gzip")) - for x in ['transfer', 'access', 'egress']: - assert os.path.exists( - os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') - ) + for x in ["transfer", "access", "egress"]: + assert os.path.exists(os.path.join(tmpdir, f"connectors_{x}.parquet.gzip")) - assert os.path.exists( - os.path.join(tmpdir, f'skims.parquet.gzip') - ) + assert os.path.exists(os.path.join(tmpdir, "skims.parquet.gzip")) diff --git a/tests/test_connectors.py b/tests/test_connectors.py index f47fe9a..e18b27f 100644 --- a/tests/test_connectors.py +++ b/tests/test_connectors.py @@ -1,12 +1,11 @@ -from collections import defaultdict import itertools import os +from collections import defaultdict import numpy as np import pytest -import unittest.mock as mock -from gtfs_skims import connectors, preprocessing +from gtfs_skims import connectors @pytest.fixture() @@ -28,17 +27,17 @@ def find_index(coords, x, y, z): def get_valid_points(coords, source_idx, max_trasfer_dist): dcoords = coords - coords[source_idx] - walk = (dcoords[:, :2]**2).sum(1)**0.5 # euclidean distance on xy + walk = (dcoords[:, :2] ** 2).sum(1) ** 0.5 # euclidean distance on xy wait = dcoords[:, 2] - walk - is_valid = (wait > 0) & ((walk+wait) <= max_trasfer_dist) + is_valid = (wait > 0) & ((walk + wait) <= max_trasfer_dist) return is_valid -@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) +@pytest.mark.parametrize("source", [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) def test_query_all_valid_included(points, source): - """ All valid points are included in the query results """ + """All valid points are included in the query results""" source_idx = find_index(points, *source) maxdist = 10 is_valid = get_valid_points(points, source_idx, maxdist) @@ -50,9 +49,9 @@ def test_query_all_valid_included(points, source): assert is_valid[dest].sum() == is_valid.sum() -@pytest.mark.parametrize('source', [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) +@pytest.mark.parametrize("source", [(0, 0, 0), (2.5, 2.5, 2.5), (-2.5, 0, 2.5)]) def test_query_all_included_valid(points, source): - """ All results from the query are valid """ + """All results from the query are valid""" source_idx = find_index(points, *source) maxdist = 10 is_valid = get_valid_points(points, source_idx, maxdist) @@ -91,15 +90,13 @@ def get_o_service_transfers(conn, services_d): transfer_times = conn.wait + conn.walk d = defaultdict(list) for i in range(len(services_d)): - d[(conn.ods[i, 0], services_d[i]) - ].append(transfer_times[i]) + d[(conn.ods[i, 0], services_d[i])].append(transfer_times[i]) return d def test_filter_nearest_service(transfer_connectors): np.random.seed(0) - services = np.random.randint( - 0, 2, size=transfer_connectors.coords.shape[0]) + services = np.random.randint(0, 2, size=transfer_connectors.coords.shape[0]) services_d = services[transfer_connectors.ods[:, 1]] # for every origin-service pair there are multiple connections @@ -131,36 +128,24 @@ def test_get_transfer_array(gtfs_data_preprocessed, config): def test_get_od_pairs(): ods = connectors.query_pairs_od( - np.array([[0, 0], [1, 1]]), - np.array([[0.5, 0.5], [2, 1], [2, 2]]), - radius=1 + np.array([[0, 0], [1, 1]]), np.array([[0.5, 0.5], [2, 1], [2, 2]]), radius=1 ) - expected = np.array([ - [0, 0], - [1, 0], - [1, 1] - ]) + expected = np.array([[0, 0], [1, 0], [1, 1]]) np.testing.assert_equal(ods, expected) def test_get_od_walk(): egress = connectors.AccessEgressConnectors( - np.array([[0, 0], [1, 1]]), - np.array([[0.5, 0.5], [2, 1], [2, 2]]), - max_transfer_distance=1 + np.array([[0, 0], [1, 1]]), np.array([[0.5, 0.5], [2, 1], [2, 2]]), max_transfer_distance=1 ) walk = egress.walk - expected = np.array([ - (2*0.5**2)**0.5, (2*0.5**2)**0.5, 1 - ]) + expected = np.array([(2 * 0.5**2) ** 0.5, (2 * 0.5**2) ** 0.5, 1]) np.testing.assert_almost_equal(walk, expected) def test_convert_distance_3d(): access = connectors.AccessEgressConnectors( - np.array([[0, 0, 0]]), - np.array([[1, 1, 1]]), - max_transfer_distance=1 + np.array([[0, 0, 0]]), np.array([[1, 1, 1]]), max_transfer_distance=1 ) assert len(access.ods) == 1 # radius has been adjusted to 3D space @@ -185,17 +170,15 @@ def test_indices_are_offset(config, gtfs_data_preprocessed, tmpdir): config.path_outputs = tmpdir conn = connectors.main(config=config, data=gtfs_data_preprocessed) stop_time_ids = list(range(len(gtfs_data_preprocessed.stop_times))) - assert all(np.isin(conn.connectors_access['dnode'], stop_time_ids)) - assert all(np.isin(conn.connectors_egress['onode'], stop_time_ids)) - assert np.isin(conn.connectors_access['onode'], stop_time_ids).sum() == 0 - assert np.isin(conn.connectors_egress['dnode'], stop_time_ids).sum() == 0 - assert conn.connectors_access['onode'].max() < conn.connectors_egress['dnode'].min() + assert all(np.isin(conn.connectors_access["dnode"], stop_time_ids)) + assert all(np.isin(conn.connectors_egress["onode"], stop_time_ids)) + assert np.isin(conn.connectors_access["onode"], stop_time_ids).sum() == 0 + assert np.isin(conn.connectors_egress["dnode"], stop_time_ids).sum() == 0 + assert conn.connectors_access["onode"].max() < conn.connectors_egress["dnode"].min() def test_main_saves_outputs(config, gtfs_data_preprocessed, tmpdir): config.path_outputs = tmpdir connectors.main(config=config, data=gtfs_data_preprocessed) - for x in ['transfer', 'access', 'egress']: - assert os.path.exists( - os.path.join(tmpdir, f'connectors_{x}.parquet.gzip') - ) + for x in ["transfer", "access", "egress"]: + assert os.path.exists(os.path.join(tmpdir, f"connectors_{x}.parquet.gzip")) diff --git a/tests/test_graph.py b/tests/test_graph.py index 6667e90..009daea 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,10 +1,9 @@ -import os +from unittest.mock import Mock -from graph_tool import Graph import numpy as np import pandas as pd import pytest -from unittest.mock import Mock +from graph_tool import Graph from gtfs_skims import graph @@ -20,34 +19,26 @@ def mock_config(mocker): @pytest.fixture() def small_graph() -> Graph: - edges = pd.DataFrame({ - 'onode': [0, 0, 1, 2], - 'dnode': [1, 2, 3, 3], - 'gc': [10, 20, 15, 4] - }) - return graph.build_graph(edges, vars=['gc']) + edges = pd.DataFrame({"onode": [0, 0, 1, 2], "dnode": [1, 2, 3, 3], "gc": [10, 20, 15, 4]}) + return graph.build_graph(edges, vars=["gc"]) @pytest.fixture() def small_graph_birectional() -> Graph: - edges = pd.DataFrame({ - 'onode': [0, 0, 1, 2, 1, 2, 3, 3], - 'dnode': [1, 2, 3, 3, 0, 0, 1, 2], - 'gc': [10, 20, 15, 4, 10, 20, 15, 4] - }) - return graph.build_graph(edges, vars=['gc']) + edges = pd.DataFrame( + { + "onode": [0, 0, 1, 2, 1, 2, 3, 3], + "dnode": [1, 2, 3, 3, 0, 0, 1, 2], + "gc": [10, 20, 15, 4, 10, 20, 15, 4], + } + ) + return graph.build_graph(edges, vars=["gc"]) def test_get_ivt_times(): - stop_times = pd.DataFrame({ - 'trip_id': [0, 1, 0, 1], - 'departure_s': [100, 105, 120, 150] - }) + stop_times = pd.DataFrame({"trip_id": [0, 1, 0, 1], "departure_s": [100, 105, 120, 150]}) ivt_edges = graph.get_ivt_edges(stop_times) - expected = np.array([ - [0, 2, 20], - [1, 3, 45] - ]) + expected = np.array([[0, 2, 20], [1, 3, 45]]) np.testing.assert_equal(ivt_edges.values, expected) @@ -55,27 +46,23 @@ def test_get_all_edges(gtfs_data_preprocessed, connectors_data): edges = graph.get_all_edges(gtfs_data_preprocessed, connectors_data) # all connections are included - len_expected = len(gtfs_data_preprocessed.stop_times) - \ - gtfs_data_preprocessed.stop_times['trip_id'].nunique() + len_expected = ( + len(gtfs_data_preprocessed.stop_times) + - gtfs_data_preprocessed.stop_times["trip_id"].nunique() + ) len_expected += len(connectors_data.connectors_transfer) len_expected += len(connectors_data.connectors_access) len_expected += len(connectors_data.connectors_egress) assert len(edges) == len_expected # all variables are included - assert list(edges.columns) == [ - 'onode', 'dnode', 'ivt', 'walk', 'wait', 'transfer'] + assert list(edges.columns) == ["onode", "dnode", "ivt", "walk", "wait", "transfer"] def test_calculate_gc(mock_config): - edges = pd.DataFrame({ - 'ivt': [100, 200], - 'walk': [30, 10], - 'wait': [10, 5], - 'transfer': [0, 1] - }) + edges = pd.DataFrame({"ivt": [100, 200], "walk": [30, 10], "wait": [10, 5], "transfer": [0, 1]}) graph.add_gc(edges, mock_config) - assert list(edges['gc']) == [190, 835] + assert list(edges["gc"]) == [190, 835] def test_get_shortest_distance_single(small_graph): @@ -86,13 +73,8 @@ def test_get_shortest_distance_single(small_graph): def test_get_distance_matrix(small_graph_birectional): - distmat = graph.get_shortest_distances( - small_graph_birectional, [0, 1, 2], [1, 2]) - expected = np.array([ - [10, 20], - [0, 19], - [19, 0] - ]) + distmat = graph.get_shortest_distances(small_graph_birectional, [0, 1, 2], [1, 2]) + expected = np.array([[10, 20], [0, 19], [19, 0]]) assert list(distmat.index) == [0, 1, 2] assert list(distmat.columns) == [1, 2] @@ -103,8 +85,9 @@ def test_correct_labels(config, gtfs_data_preprocessed, connectors_data, tmpdir) origins = pd.read_csv(config.path_origins, index_col=0) destinations = pd.read_csv(config.path_destinations, index_col=0) config.path_outputs = tmpdir - distmat = graph.main(config=config, gtfs_data=gtfs_data_preprocessed, - connectors_data=connectors_data) + distmat = graph.main( + config=config, gtfs_data=gtfs_data_preprocessed, connectors_data=connectors_data + ) assert list(distmat.index) == list(origins.index) assert list(distmat.columns) == list(destinations.index) diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index acba58e..c3b7a9d 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,24 +1,21 @@ import os -from pathlib import Path -import pytest from gtfs_skims import preprocessing def test_filter_date(gtfs_data): - a = 1 assert 14 in gtfs_data.calendar.service_id.values preprocessing.filter_day(gtfs_data, 20180507) assert list(gtfs_data.calendar.service_id) == [14] - assert set(gtfs_data.trips['service_id']) == set([14]) + assert set(gtfs_data.trips["service_id"]) == set([14]) def test_filter_time(gtfs_data): - start_time = 9*3600 - end_time = 10*3600 + start_time = 9 * 3600 + end_time = 10 * 3600 preprocessing.filter_time(gtfs_data, start_time, end_time) - assert gtfs_data.stop_times['arrival_s'].min() >= start_time - assert gtfs_data.stop_times['departure_s'].max() <= end_time + assert gtfs_data.stop_times["arrival_s"].min() >= start_time + assert gtfs_data.stop_times["departure_s"].max() <= end_time def test_projected_coords_within_bounds(gtfs_data): @@ -27,10 +24,10 @@ def test_projected_coords_within_bounds(gtfs_data): xmin, ymin = 423104, 69171 xmax, ymax = 471370, 101154 - assert gtfs_data.stops['x'].min() > xmin - assert gtfs_data.stops['x'].max() < xmax - assert gtfs_data.stops['y'].min() > ymin - assert gtfs_data.stops['y'].max() < ymax + assert gtfs_data.stops["x"].min() > xmin + assert gtfs_data.stops["x"].max() < xmax + assert gtfs_data.stops["y"].min() > ymin + assert gtfs_data.stops["y"].max() < ymax def test_within_bounding_box(gtfs_data): @@ -39,21 +36,18 @@ def test_within_bounding_box(gtfs_data): # filter for Cowes xmin, ymin = 447477, 92592 xmax, ymax = 451870, 96909 - assert gtfs_data.stops['x'].min() < xmin - preprocessing.filter_bounding_box( - gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) + assert gtfs_data.stops["x"].min() < xmin + preprocessing.filter_bounding_box(gtfs_data, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) - assert gtfs_data.stops['x'].min() > xmin - assert gtfs_data.stops['x'].max() < xmax - assert gtfs_data.stops['y'].min() > ymin - assert gtfs_data.stops['y'].max() < ymax + assert gtfs_data.stops["x"].min() > xmin + assert gtfs_data.stops["x"].max() < xmax + assert gtfs_data.stops["y"].min() > ymin + assert gtfs_data.stops["y"].max() < ymax def test_run_preprocessing_demo(config, tmpdir): - path_outputs = os.path.join(tmpdir, 'outputs') + path_outputs = os.path.join(tmpdir, "outputs") config.path_outputs = path_outputs preprocessing.main(config) - for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: - assert os.path.exists( - os.path.join(path_outputs, f'{x}.parquet.gzip') - ) + for x in ["calendar", "routes", "stops", "stop_times", "trips"]: + assert os.path.exists(os.path.join(path_outputs, f"{x}.parquet.gzip")) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5585b62..eba5713 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,32 +6,30 @@ def test_parse_timestamp(): - assert utils.ts_to_sec('00:00:00') == 0 - assert utils.ts_to_sec('10:01:01') == 36061 + assert utils.ts_to_sec("00:00:00") == 0 + assert utils.ts_to_sec("10:01:01") == 36061 def test_get_logger(tmpdir): - logger = utils.get_logger(os.path.join(tmpdir, 'logs', 'log.log')) - logger.info('test') + logger = utils.get_logger(os.path.join(tmpdir, "logs", "log.log")) + logger.info("test") def test_weekday(): - assert utils.get_weekday(20231201) == 'friday' + assert utils.get_weekday(20231201) == "friday" def test_load_config(config): - 'path_gtfs' in config.__dict__ + "path_gtfs" in config.__dict__ def test_load_gtfs(gtfs_data): - for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: + for x in ["calendar", "routes", "stops", "stop_times", "trips"]: assert isinstance(getattr(gtfs_data, x), pd.DataFrame) def test_cache_gtfs(gtfs_data, tmpdir): gtfs_data.save(tmpdir) gtfs_cached = utils.GTFSData.from_parquet(tmpdir) - for x in ['calendar', 'routes', 'stops', 'stop_times', 'trips']: - pd.testing.assert_frame_equal( - getattr(gtfs_data, x), getattr(gtfs_cached, x) - ) + for x in ["calendar", "routes", "stops", "stop_times", "trips"]: + pd.testing.assert_frame_equal(getattr(gtfs_data, x), getattr(gtfs_cached, x)) From ce08a8e89d696a7a08aad4fe43fc02cd83b3f1c7 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 15 Dec 2023 09:16:38 +0000 Subject: [PATCH 26/30] linting --- gtfs_skims/cli.py | 3 +-- gtfs_skims/connectors.py | 10 +++++++--- gtfs_skims/utils.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gtfs_skims/cli.py b/gtfs_skims/cli.py index 5c93373..e170fb0 100755 --- a/gtfs_skims/cli.py +++ b/gtfs_skims/cli.py @@ -13,8 +13,7 @@ @click.version_option(package_name="gtfs_skims") @click.group def cli(args=None): - """Console script for gtfs_skims.""" - click.echo("Console script for Argo (gtfs_skims).") + """Console script for Argo (gtfs_skims).""" return 0 diff --git a/gtfs_skims/connectors.py b/gtfs_skims/connectors.py index b611756..77d574c 100644 --- a/gtfs_skims/connectors.py +++ b/gtfs_skims/connectors.py @@ -18,7 +18,7 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array: has always greater z coordinate than the origin point. Args: - coords (np.array): Point coordinates (x, y, z) + coords (np.ndarray): Point coordinates (x, y, z) radius (float): Maximum distance between points Returns: @@ -33,9 +33,13 @@ def query_pairs(coords: np.ndarray, radius: float) -> np.array: class TransferConnectors: - """Manages transfer connectors.""" - def __init__(self, coords: np.ndarray, max_transfer_distance: float) -> None: + """Manages transfer connectors. + + Args: + coords (np.ndarray): Point coordinates (x, y, z) + max_transfer_distance (float): Maximum distance between points + """ self.coords = coords radius = max_transfer_distance * (2**0.5) self.ods = query_pairs(coords, radius=radius) diff --git a/gtfs_skims/utils.py b/gtfs_skims/utils.py index 4c9dda5..b673141 100644 --- a/gtfs_skims/utils.py +++ b/gtfs_skims/utils.py @@ -2,6 +2,7 @@ import logging import os +from abc import ABC from dataclasses import dataclass from datetime import datetime from pathlib import Path @@ -147,7 +148,7 @@ def __repr__(self) -> str: @dataclass -class Data: +class Data(ABC): @classmethod def from_gtfs(cls, path_gtfs: str) -> Data: """Load GTFS tables from a standard zipped GTFS file. From c1486f0b7c120c58ac95afef026b7676f917701e Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 15 Dec 2023 09:30:13 +0000 Subject: [PATCH 27/30] fix tests --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9e61d4a..4f60664 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -17,7 +17,7 @@ def test_command_line_interface(): help_result = runner.invoke(cli.cli, ["--help"]) assert help_result.exit_code == 0 assert ( - "Console script for gtfs_skims.\n\nOptions:\n " + "Console script for Argo (gtfs_skims).\n\nOptions:\n " "--version Show the version and exit.\n " "--help Show this message and exit.\n" in help_result.output ) From 7f048b4004d517634b5f7fc4639545e76d091a42 Mon Sep 17 00:00:00 2001 From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com> Date: Fri, 15 Dec 2023 11:39:50 +0000 Subject: [PATCH 28/30] Fix for github.com/arup-group/genet/issues/213 --- gtfs_skims/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gtfs_skims/__init__.py b/gtfs_skims/__init__.py index 24d1f27..36ba3aa 100755 --- a/gtfs_skims/__init__.py +++ b/gtfs_skims/__init__.py @@ -1,5 +1,8 @@ """Top-level module for gtfs_skims.""" +import pyproj __author__ = """Theodore-Chatziioannou""" __email__ = "Theodore.Chatziioannou@arup.com" __version__ = "0.1.0" + +pyproj.network.set_network_enabled(False) From 7d97c33ef36dc3cbd3c205123f1562c18384fec9 Mon Sep 17 00:00:00 2001 From: "Theodore.Chatziioannou" Date: Fri, 15 Dec 2023 11:44:56 +0000 Subject: [PATCH 29/30] remove tests init file --- tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 From e90e2a3904de2335eedd42e6928161f5022d236b Mon Sep 17 00:00:00 2001 From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com> Date: Fri, 15 Dec 2023 16:53:42 +0000 Subject: [PATCH 30/30] Clean up text duplication --- README.md | 9 ++++----- docs/index.md | 8 +------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7753847..87cfe61 100755 --- a/README.md +++ b/README.md @@ -7,13 +7,12 @@ [![Daily CI Build](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml/badge.svg)](https://github.com/arup-group/gtfs_skims/actions/workflows/daily-scheduled-ci.yml) [![Documentation](https://github.com/arup-group/gtfs_skims/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages)](https://arup-group.github.io/gtfs_skims) - - - -Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. -By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. +Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. +By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). + + ## Documentation For more detailed instructions, see our [documentation](https://arup-group.github.io/gtfs_skims/latest). diff --git a/docs/index.md b/docs/index.md index 3909da0..8d4a99a 100755 --- a/docs/index.md +++ b/docs/index.md @@ -1,7 +1 @@ ---8<-- "README.md:docs" - -# Argo - -Argo is a library aimed at the fast calculation of generalised time matrices from GTFS files. -By applying appropriate simplifications on the GTFS dataset, the library is able to calculate such matrices at scale. -For example, it was possible to calculate an MSOA-to-MSOA matrix for England and Wales in ~1 hour (with a relatevile large machine). \ No newline at end of file +--8<-- "README.md:docs" \ No newline at end of file