diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d0539c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/Manifest.toml +/.vscode +/testing_files/ \ No newline at end of file diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index ba2a110..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,564 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.9.1" -manifest_format = "2.0" -project_hash = "13c005244b6149473cd018459c12b37a69eeb16a" - -[[deps.ANSIColoredPrinters]] -git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" -uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" -version = "0.0.1" - -[[deps.AbstractTrees]] -git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.4.5" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.BitFlags]] -git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b" -uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" -version = "0.1.8" - -[[deps.CEnum]] -git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.5.0" - -[[deps.CSV]] -deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] -git-tree-sha1 = "a44910ceb69b0d44fe262dd451ab11ead3ed0be8" -uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -version = "0.10.13" - -[[deps.CodecZlib]] -deps = ["TranscodingStreams", "Zlib_jll"] -git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73" -uuid = "944b1d66-785c-5afd-91f1-9de20f533193" -version = "0.7.4" - -[[deps.Compat]] -deps = ["TOML", "UUIDs"] -git-tree-sha1 = "c955881e3c981181362ae4088b35995446298b80" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "4.14.0" -weakdeps = ["Dates", "LinearAlgebra"] - - [deps.Compat.extensions] - CompatLinearAlgebraExt = "LinearAlgebra" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.2+0" - -[[deps.ConcurrentUtilities]] -deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" -uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" - -[[deps.ConstructionBase]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "260fd2400ed2dab602a7c15cf10c1933c59930a2" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.5.5" - - [deps.ConstructionBase.extensions] - ConstructionBaseIntervalSetsExt = "IntervalSets" - ConstructionBaseStaticArraysExt = "StaticArrays" - - [deps.ConstructionBase.weakdeps] - IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.Crayons]] -git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.1.1" - -[[deps.DataAPI]] -git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.16.0" - -[[deps.DataFrames]] -deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.6.1" - -[[deps.DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "0f4b5d62a88d8f59003e43c25a8a90de9eb76317" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.18" - -[[deps.DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.DocStringExtensions]] -deps = ["LibGit2"] -git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.9.3" - -[[deps.Documenter]] -deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"] -git-tree-sha1 = "4a40af50e8b24333b9ec6892546d9ca5724228eb" -uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "1.3.0" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.ExceptionUnwrapping]] -deps = ["Test"] -git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" -uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" -version = "0.1.10" - -[[deps.Expat_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" -uuid = "2e619515-83b5-522b-bb60-26c02a35a201" -version = "2.5.0+0" - -[[deps.EzXML]] -deps = ["Printf", "XML2_jll"] -git-tree-sha1 = "380053d61bb9064d6aa4a9777413b40429c79901" -uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" -version = "1.2.0" - -[[deps.FilePathsBase]] -deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"] -git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa" -uuid = "48062228-2e41-5def-b9a4-89aafe57970f" -version = "0.9.21" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[deps.Git]] -deps = ["Git_jll"] -git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e" -uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" -version = "1.3.1" - -[[deps.Git_jll]] -deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] -git-tree-sha1 = "d18fb8a1f3609361ebda9bf029b60fd0f120c809" -uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" -version = "2.44.0+2" - -[[deps.HTTP]] -deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "995f762e0182ebc50548c434c171a5bb6635f8e4" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.4" - -[[deps.IOCapture]] -deps = ["Logging", "Random"] -git-tree-sha1 = "8b72179abc660bfab5e28472e019392b97d0985c" -uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" -version = "0.2.4" - -[[deps.InlineStrings]] -deps = ["Parsers"] -git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" -uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" -version = "1.4.0" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.InvertedIndices]] -git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.3.0" - -[[deps.IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[deps.JLLWrappers]] -deps = ["Artifacts", "Preferences"] -git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.5.0" - -[[deps.JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.4" - -[[deps.LaTeXStrings]] -git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.1" - -[[deps.LazilyInitializedFields]] -git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" -uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" -version = "1.2.2" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.Libiconv_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.17.0+0" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.LoggingExtras]] -deps = ["Dates", "Logging"] -git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "1.0.3" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MarkdownAST]] -deps = ["AbstractTrees", "Markdown"] -git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" -uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" -version = "0.1.2" - -[[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] -git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.9" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+0" - -[[deps.Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.1.0" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.10.11" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.21+4" - -[[deps.OpenSSL]] -deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "af81a32750ebc831ee28bdaaba6e1067decef51e" -uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.4.2" - -[[deps.OpenSSL_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "60e3045590bd104a16fefb12836c00c0ef8c7f8c" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "3.0.13+0" - -[[deps.OrderedCollections]] -git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.6.3" - -[[deps.PCRE2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" -version = "10.42.0+0" - -[[deps.Parsers]] -deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.8.1" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.9.0" - -[[deps.PooledArrays]] -deps = ["DataAPI", "Future"] -git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.4.3" - -[[deps.PrecompileTools]] -deps = ["Preferences"] -git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" -uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -version = "1.2.1" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.4.3" - -[[deps.PrettyTables]] -deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] -git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "2.3.1" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.ReadStatTables]] -deps = ["CEnum", "DataAPI", "Dates", "InlineStrings", "PooledArrays", "PrecompileTools", "PrettyTables", "ReadStat_jll", "SentinelArrays", "StructArrays", "Tables"] -git-tree-sha1 = "7a2c572e97f5588f5774d2b9024cad18401ec977" -uuid = "52522f7a-9570-4e34-8ac6-c005c74d4b84" -version = "0.2.6" - -[[deps.ReadStat_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "28e990e90ca643e99f3ec0188089c1816e8b46f4" -uuid = "a4dc8951-f1cc-5499-9034-9ec1c3e64557" -version = "1.1.9+0" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - -[[deps.RegistryInstances]] -deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] -git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" -uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" -version = "0.1.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.SentinelArrays]] -deps = ["Dates", "Random"] -git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" -uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" -version = "1.4.1" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.SimpleBufferStream]] -git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" -uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" -version = "1.1.0" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.2.1" - -[[deps.SparseArrays]] -deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -version = "1.9.0" - -[[deps.StringManipulation]] -deps = ["PrecompileTools"] -git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" -uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" -version = "0.3.4" - -[[deps.StructArrays]] -deps = ["ConstructionBase", "DataAPI", "Tables"] -git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be" -uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" -version = "0.6.18" - - [deps.StructArrays.extensions] - StructArraysAdaptExt = "Adapt" - StructArraysGPUArraysCoreExt = "GPUArraysCore" - StructArraysSparseArraysExt = "SparseArrays" - StructArraysStaticArraysExt = "StaticArrays" - - [deps.StructArrays.weakdeps] - Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" - GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" - SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.SuiteSparse_jll]] -deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] -uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" -version = "5.10.1+6" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.3" - -[[deps.TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.1" - -[[deps.Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] -git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.11.1" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.TranscodingStreams]] -git-tree-sha1 = "14389d51751169994b2e1317d5c72f7dc4f21045" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.10.6" -weakdeps = ["Random", "Test"] - - [deps.TranscodingStreams.extensions] - TestExt = ["Test", "Random"] - -[[deps.URIs]] -git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" -uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -version = "1.5.1" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.WeakRefStrings]] -deps = ["DataAPI", "InlineStrings", "Parsers"] -git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" -uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" -version = "1.4.2" - -[[deps.WorkerUtilities]] -git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" -uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" -version = "1.6.1" - -[[deps.XLSX]] -deps = ["Artifacts", "Dates", "EzXML", "Printf", "Tables", "ZipFile"] -git-tree-sha1 = "319b05e790046f18f12b8eae542546518ef1a88f" -uuid = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" -version = "0.10.1" - -[[deps.XML2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "07e470dabc5a6a4254ffebc29a1b3fc01464e105" -uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.12.5+0" - -[[deps.ZipFile]] -deps = ["Libdl", "Printf", "Zlib_jll"] -git-tree-sha1 = "f492b7fe1698e623024e873244f10d89c95c340a" -uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" -version = "0.10.1" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.13+0" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.8.0+0" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" diff --git a/Project.toml b/Project.toml index a53bfe2..aa8e1be 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,14 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] julia = "1.9" +CSV = "0.10" +DataFrames = "1.5" +Dates = "1.9" +Documenter = "0.27, 1" +HTTP = "1.10" +ReadStatTables = "0.3" +Reexport = "0.2, 1" +XLSX = "0.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/README.md b/README.md index 2c20622..52840e5 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,13 @@ ## What is TidierFiles.jl? -TidierFiles.jl leverages the `CSV.jl`, `XLSX.jl`, and `ReadStatTables.jl` packages to reimplement the R `haven` and `readr` packages. -Currently supported file types +TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages. + +Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats. + + +Currently supported file types: - `read_csv` and `write_csv` - `read_tsv` and `write_tsv` - `read_xlsx` and `write_xlsx` @@ -20,12 +24,25 @@ Currently supported file types - `read_dta` and `write_dta` (.dta) # Examples -For CSVs (also TSV, white space tables, other delimters) -``` -df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5], dates=[Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], times=[Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)]) -write_csv(df, "/Users/danielrizk/Downloads/testing.csv" , col_names= true) -read_csv("/Users/danielrizk/Downloads/testing.csv", missingstring=["40.5", "10.2"]) + +Here is an example of how to write and read a CSV file. + +```julia +using TidierFiles + +df = DataFrame( + integers = [1, 2, 3, 4], + strings = ["This", "Package makes", "File reading/writing", "even smoother"], + floats = [10.2, 20.3, 30.4, 40.5], + dates = [Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], + times = [Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)] + ) + +write_csv(df, "testing.csv" , col_names = true) + +read_csv("testing.csv", missingstring=["40.5", "10.2"]) ``` + ``` 4×5 DataFrame Row │ integers strings floats dates times @@ -37,37 +54,28 @@ read_csv("/Users/danielrizk/Downloads/testing.csv", missingstring=["40.5", "10.2 4 │ 4 even smoother missing 2018-02-23 19:40:00:00 ``` -For Excel files -``` -df2 = DataFrames.DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]) -write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path, overwrite = true) -read_excel(path, sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [20.3]) -``` -``` -3×5 DataFrame - Row │ integers strings floats dates times - │ Int64 String Any Date Time -─────┼─────────────────────────────────────────────────────────────── - 1 │ 2 Package makes missing 2018-02-21 19:20:00 - 2 │ 3 File reading/writing 30.4 2018-02-22 19:30:00 - 3 │ 4 even smoother 40.5 2018-02-23 19:40:00 +The file reading functions include the following keyword arguments: +- `path` +- `missingstring` +- `col_names` +- `col_select` +- `num_threads` +- `skip` +- `n_max` +- `delim` (where applicable) +The path can be a file available either locally or on the web. + +```julia +read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"]) ``` -FOR FWF files -``` -path = "fwftest.txt" -read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=6) -## `fwf_empty` will parse and guess path widths based on padding and user determined number of lines -``` -``` -6×5 DataFrame - Row │ Name Age ID Position Salary - │ String String String String String -─────┼──────────────────────────────────────────────────────────── - 1 │ Bob Brown 31 12345 Product Manager $110,000 - 2 │ Charlie Day 28 34 Sales Associate $70,000 - 3 │ Diane Poe 35 23456 Data Scientist $130,000 - 4 │ Eve Stone 4 123456 Chief Financial Off $200,000 - 5 │ Frank Moore 33 1234 Graphic Designer $80,000 - 6 │ Grace Lee 27 123456 Software Developer $115,000 + ``` +3×2 DataFrame + Row │ ID Score + │ Int64? Int64 +─────┼──────────────── + 1 │ 3 77 + 2 │ missing 85 + 3 │ 5 95 +``` \ No newline at end of file diff --git a/docs/Project.toml b/docs/Project.toml index b7e7618..87526c2 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,5 +8,5 @@ Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0 +XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" TidierFiles = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db" \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index d9f2546..cbeb60a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,13 +1,13 @@ +# TidierFiles.jl + ## What is TidierFiles.jl? -TidierFiles.jl is a 100% Julia implementation of the readr and haven R packages. -Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl -seeks to harmonize file reading/writing by unifying the arguments across multiple -file types. +TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages. + +Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats. + -TidierFiles.jl currently supports -```@raw html -!!! example +Currently supported file types: - `read_csv` and `write_csv` - `read_tsv` and `write_tsv` - `read_xlsx` and `write_xlsx` @@ -17,9 +17,39 @@ TidierFiles.jl currently supports - `read_sav` and `write_sav` (.sav and .por) - `read_sas` and `write_sas` (.sas7bdat and .xpt) - `read_dta` and `write_dta` (.dta) + +# Examples + +Here is an example of how to write and read a CSV file. + +```julia +using TidierFiles + +df = DataFrame( + integers = [1, 2, 3, 4], + strings = ["This", "Package makes", "File reading/writing", "even smoother"], + floats = [10.2, 20.3, 30.4, 40.5], + dates = [Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], + times = [Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)] + ) + +write_csv(df, "testing.csv" , col_names = true) + +read_csv("testing.csv", missingstring=["40.5", "10.2"]) ``` -Read functions include the following arguments and support HTTP reading: +``` +4×5 DataFrame + Row │ integers strings floats dates times + │ Int64 String31 Float64? Date Time +─────┼───────────────────────────────────────────────────────────────── + 1 │ 1 This missing 2018-02-20 19:10:00 + 2 │ 2 Package makes 20.3 2018-02-21 19:20:00 + 3 │ 3 File reading/writing 30.4 2018-02-22 19:30:00 + 4 │ 4 even smoother missing 2018-02-23 19:40:00:00 +``` + +The file reading functions include the following keyword arguments: - `path` - `missingstring` - `col_names` @@ -27,14 +57,14 @@ Read functions include the following arguments and support HTTP reading: - `num_threads` - `skip` - `n_max` -- `delim` (where applies) +- `delim` (where applicable) -```julia -using TidierFiles +The path can be a file available either locally or on the web. +```julia read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"]) - ``` + ``` 3×2 DataFrame Row │ ID Score diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl index a45039f..ead47f9 100644 --- a/src/TidierFiles.jl +++ b/src/TidierFiles.jl @@ -20,7 +20,6 @@ include("fwf.jl") include("xlfiles.jl") include("statsfiles.jl") - """ $docstring_read_csv """ @@ -85,34 +84,10 @@ function read_csv(file; return df end - -""" -$docstring_write_csv """ -function write_csv( - x::DataFrame, - file::String; - missingstring::String = "NA", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Configure threading - CSV.write( - file, - x, - append = append, - writeheader = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1 ) -end - -""" -$docstring_read_tsv +$docstring_read_delim """ -function read_tsv(file; +function read_delim(file; delim='\t', col_names=true, skip=0, @@ -123,7 +98,7 @@ function read_tsv(file; escape_double=true, ntasks::Int = Threads.nthreads(), # Default ntasks value num_threads::Union{Int, Nothing}=nothing) # Optional num_threads - + # Use num_threads if provided, otherwise stick with ntasks effective_ntasks = isnothing(num_threads) ? ntasks : num_threads @@ -138,9 +113,9 @@ function read_tsv(file; delim = delim, header = col_names === true ? 1 : 0, skipto = skipto + 1, + select = col_select, footerskip = 0, limit = limit, - select = col_select, comment = comment, missingstring = missingstring, escapechar = escape_double ? '"' : '\\', @@ -148,14 +123,17 @@ function read_tsv(file; normalizenames = false, ntasks = effective_ntasks > 1 ) - # Read the TSV file into a DataFrame + # Filter options to remove any set to `nothing` + # clean_options = Dict{Symbol,Any}(filter(p -> !isnothing(p[2]), read_options)) + + # Read the file into a DataFrame if startswith(file, "http://") || startswith(file, "https://") # Fetch the content from the URL response = HTTP.get(file) # Ensure the request was successful if response.status != 200 - error("Failed to fetch the TSV file: HTTP status code ", response.status) + error("Failed to fetch the delim file: HTTP status code ", response.status) end # Read the CSV data from the fetched content using cleaned options @@ -168,33 +146,9 @@ function read_tsv(file; end """ -$docstring_write_tsv -""" -function write_tsv( - x::DataFrame, - file::String; - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to TSV - CSV.write( - file, - x, - delim = '\t', # Use tab as the delimiter for TSV - append = append, - writeheader = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - -""" -$docstring_read_delim +$docstring_read_tsv """ -function read_delim(file; +function read_tsv(file; delim='\t', col_names=true, skip=0, @@ -205,7 +159,7 @@ function read_delim(file; escape_double=true, ntasks::Int = Threads.nthreads(), # Default ntasks value num_threads::Union{Int, Nothing}=nothing) # Optional num_threads - + # Use num_threads if provided, otherwise stick with ntasks effective_ntasks = isnothing(num_threads) ? ntasks : num_threads @@ -220,9 +174,9 @@ function read_delim(file; delim = delim, header = col_names === true ? 1 : 0, skipto = skipto + 1, - select = col_select, footerskip = 0, limit = limit, + select = col_select, comment = comment, missingstring = missingstring, escapechar = escape_double ? '"' : '\\', @@ -230,17 +184,14 @@ function read_delim(file; normalizenames = false, ntasks = effective_ntasks > 1 ) - # Filter options to remove any set to `nothing` - # clean_options = Dict{Symbol,Any}(filter(p -> !isnothing(p[2]), read_options)) - - # Read the file into a DataFrame + # Read the TSV file into a DataFrame if startswith(file, "http://") || startswith(file, "https://") # Fetch the content from the URL response = HTTP.get(file) # Ensure the request was successful if response.status != 200 - error("Failed to fetch the delim file: HTTP status code ", response.status) + error("Failed to fetch the TSV file: HTTP status code ", response.status) end # Read the CSV data from the fetched content using cleaned options @@ -311,6 +262,52 @@ function read_table(file; return df end +""" +$docstring_write_csv +""" +function write_csv( + x::DataFrame, + file::String; + missingstring::String = "NA", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Configure threading + CSV.write( + file, + x, + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1 ) +end + +""" +$docstring_write_tsv +""" +function write_tsv( + x::DataFrame, + file::String; + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to TSV + CSV.write( + file, + x, + delim = '\t', # Use tab as the delimiter for TSV + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end """ $docstring_write_table @@ -331,7 +328,7 @@ function write_table( x, delim = delim, # Flexible delimiter based on argument append = append, - writeheader = col_names && !append, + header = col_names && !append, missingstring = missingstring, newline = eol, threaded = num_threads > 1) diff --git a/src/docstrings.jl b/src/docstrings.jl index ef88e0d..f08637f 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -17,11 +17,15 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true. `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest -julia> read_csv(joinpath(testing_files_path, "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_csv(df, "csvtest.csv"); + +julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score - │ Int64 String7 Int64? + │ Int64 String7 Int64? ─────┼───────────────────────── 1 │ 3 missing 77 2 │ 4 David 85 @@ -49,11 +53,15 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads. # Examples -```jldoctest -julia> read_tsv(joinpath(testing_files_path, "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_tsv(df, "tsvtest.tsv"); + +julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame - Row │ ID Name Score - │ Int64 String7 Int64 + Row │ ID Name Score + │ Int64 String7 Int64 ─────┼─────────────────────── 1 │ 3 missing 77 2 │ 4 David 85 @@ -82,8 +90,12 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads. # Examples -```jldoctest -julia> read_delim(joinpath(testing_files_path, "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_csv(df, "csvtest.csv"); + +julia> read_delim("csvtest.csv", delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration 6×3 DataFrame Row │ Column1 Column2 Column3 │ String3 String7 String7 @@ -97,8 +109,6 @@ julia> read_delim(joinpath(testing_files_path, "csvtest.csv"), delim = ",", col_ ``` """ - - const docstring_read_fwf = """ read_fwf(filepath::String; num_lines::Int=4, col_names=nothing) @@ -112,8 +122,15 @@ Read fixed-width format (FWF) files into a DataFrame. - `skip_to`=0: Number of lines at the beginning of the file to skip before reading data. - `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines. # Examples -```jldoctest -julia> path = joinpath(testing_files_path, "fwftest.txt"); +```jldoctest +julia> fwf_data = + "John Smith 35 12345 Software Engineer 120,000 \\nJane Doe 29 2345 Marketing Manager 95,000 \\nAlice Jones 42 123456 CEO 250,000 \\nBob Brown 31 12345 Product Manager 110,000 \\nCharlie Day 28 345 Sales Associate 70,000 \\nDiane Poe 35 23456 Data Scientist 130,000 \\nEve Stone 40 123456 Chief Financial Off 200,000 \\nFrank Moore 33 1234 Graphic Designer 80,000 \\nGrace Lee 27 123456 Software Developer 115,000 \\nHank Zuse 45 12345 System Analyst 120,000 "; + +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + +julia> path = "fwftest.txt"; julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) 3×5 DataFrame @@ -126,7 +143,6 @@ julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", " ``` """ - const docstring_fwf_empty = """ fwf_empty(filepath::String; num_lines::Int=4, col_names=nothing) @@ -142,16 +158,24 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of integers representing the detected column widths. - A vector of strings representing the column names. # Examples -```jldoctest -julia> fwf_empty(joinpath(testing_files_path, "fwftest.txt")) +```jldoctest +julia> fwf_data = + "John Smith 35 12345 Software Engineer 120,000 \\nJane Doe 29 2345 Marketing Manager 95,000 \\nAlice Jones 42 123456 CEO 250,000 \\nBob Brown 31 12345 Product Manager 110,000 \\nCharlie Day 28 345 Sales Associate 70,000 \\nDiane Poe 35 23456 Data Scientist 130,000 \\nEve Stone 40 123456 Chief Financial Off 200,000 \\nFrank Moore 33 1234 Graphic Designer 80,000 \\nGrace Lee 27 123456 Software Developer 115,000 \\nHank Zuse 45 12345 System Analyst 120,000 "; + +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + +julia> path = "fwftest.txt"; + +julia> fwf_empty(path) ([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"]) -julia> fwf_empty(joinpath(testing_files_path, "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) +julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) ([13, 5, 8, 20, 8], ["Name", "Age", "ID", "Position", "Salary"]) ``` """ - const docstring_write_csv = """ write_csv(DataFrame, filepath; na = "", append = false, col_names = true, missingstring, eol = "\n", num_threads = Threads.nthreads()) @@ -170,7 +194,7 @@ Write a DataFrame to a CSV (comma-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_csv(df, joinpath(testing_files_path, "csvtest.csv")); +julia> write_csv(df, "csvtest.csv"); ``` """ @@ -192,7 +216,7 @@ Write a DataFrame to a TSV (tab-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_tsv(df, joinpath(testing_files_path, "tsvtest.tsv")); +julia> write_tsv(df, "tsvtest.tsv"); ``` """ @@ -213,7 +237,11 @@ Read a table from a file where columns are separated by any amount of whitespace -`kwargs`: Additional keyword arguments passed to CSV.File. # Examples ```jldoctest -julia> read_table(joinpath(testing_files_path, "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"]) +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_table(df, "tabletest.txt"); + +julia> read_table("tabletest.txt", skip = 2, n_max = 3, col_select = ["Name"]) 3×1 DataFrame Row │ Name │ String7 @@ -225,7 +253,6 @@ julia> read_table(joinpath(testing_files_path, "tabletest.txt"), skip = 2, n_max ``` """ - const docstring_write_table = """ write_table(x, file; delim = '\t', na, append, col_names, eol, num_threads) @@ -245,7 +272,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_table(df, joinpath(testing_files_path, "tabletest.txt")); +julia> write_table(df, "tabletest.txt"); ``` """ @@ -260,15 +287,23 @@ Read data from an Excel file into a DataFrame. -`range`: Specifies a specific range of cells to be read from the sheet. If nothing, the entire sheet is read. -`col_names`: Indicates whether the first row of the specified range should be treated as column names. If false, columns will be named automatically. -`col_types`: Allows specifying column types explicitly. Can be a single type applied to all columns, a list or a dictionary mapping column names or indices to types. If nothing, types will be inferred. --`missingstring`: The string that represents missing values in the Excel file. +-`missingstring`: The value or vector that represents missing values in the Excel file. -`trim_ws`: Whether to trim leading and trailing whitespace from cells in the Excel file. -`skip`: Number of rows to skip at the beginning of the sheet or range before reading data. -`n_max`: The maximum number of rows to read from the sheet or range, after skipping. Inf means read all available rows. -`guess_max`: The maximum number of rows to scan for type guessing and column names detection. Only relevant if col_types is nothing or col_names is true. If nothing, a default heuristic is used. # Examples -```jldoctest -julia> read_xlsx(joinpath(testing_files_path, "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) +```jldoctest +julia> df = DataFrame(integers=[1, 2, 3, 4], + strings=["This", "Package makes", "File reading/writing", "even smoother"], + floats=[10.2, 20.3, 30.4, 40.5]); + +julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); + +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); + +julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) 3×3 DataFrame Row │ integers strings floats │ Any String Float64 @@ -291,11 +326,13 @@ Write a DataFrame, or multiple DataFrames, to an Excel file. # Examples ```jldoctest -julia> df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5]); +julia> df = DataFrame(integers=[1, 2, 3, 4], + strings=["This", "Package makes", "File reading/writing", "even smoother"], + floats=[10.2, 20.3, 30.4, 40.5]); julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); -julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(testing_files_path, "xlsxtest.xlsx"), overwrite = true); +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); ``` """ @@ -314,8 +351,12 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest -julia> read_sas(joinpath(testing_files_path, "test.dta")) +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_dta(df, "test.dta"); + +julia> read_dta("test.dta") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -339,25 +380,31 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest -julia> read_sas(joinpath(testing_files_path, "test.sas7bdat")) +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_sas(df, "test.sas7bdat"); + +julia> read_sas("test.sas7bdat") 2×2 DataFrame Row │ AA AB - │ String Float64 + │ String3 Float64 ─────┼────────────────── 1 │ sav 10.1 2 │ por 10.2 -julia> read_sas(joinpath(testing_files_path, "test.xpt")) +julia> write_sas(df, "test.xpt"); + +julia> read_sas("test.xpt") 2×2 DataFrame Row │ AA AB - │ String Float64 + │ String3 Float64 ─────┼────────────────── 1 │ sav 10.1 2 │ por 10.2 """ -const docstring_read_sav = +const docstring_read_sav = """ function read_sav(data_file; encoding=nothing, col_select=nothing, skip=0, n_max=Inf) Read data from a SPSS (.sav and .por) file into a DataFrame, supporting both local and remote sources. @@ -371,8 +418,12 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest -julia> read_sav(joinpath(testing_files_path, "test.sav")) +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_sav(df, "test.sav"); + +julia> read_sav("test.sav") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -380,7 +431,9 @@ julia> read_sav(joinpath(testing_files_path, "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sav(joinpath(testing_files_path, "test.por")) +julia> write_sav(df, "test.por"); + +julia> read_sav("test.por") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -403,21 +456,21 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df , joinpath(testing_files_path, "test.sav")) +julia> write_sav(df, "test.sav") 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 -julia> write_sav(df , joinpath(testing_files_path, "test.por")) +julia> write_sav(df, "test.por") 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ const docstring_write_sas = @@ -433,21 +486,21 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df , joinpath(testing_files_path, "test.sas7bdat")) +julia> write_sas(df, "test.sas7bdat") 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 -julia> write_sav(df , joinpath(testing_files_path, "test.xpt")) +julia> write_sas(df, "test.xpt") 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ @@ -464,12 +517,12 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_dta(df , joinpath(testing_files_path, "test.dta")) +julia> write_dta(df, "test.dta") 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ \ No newline at end of file diff --git a/src/statsfiles.jl b/src/statsfiles.jl index 297035e..2012350 100644 --- a/src/statsfiles.jl +++ b/src/statsfiles.jl @@ -31,7 +31,7 @@ function read_sas(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 @@ -76,7 +76,7 @@ function read_sav(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 @@ -121,7 +121,7 @@ function read_dta(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 @@ -133,7 +133,6 @@ function read_dta(data_file; return df end - """ $docstring_write_sas """ diff --git a/src/xlfiles.jl b/src/xlfiles.jl index d3fe70a..eeafd75 100644 --- a/src/xlfiles.jl +++ b/src/xlfiles.jl @@ -134,4 +134,4 @@ function write_xlsx(x; path::String, overwrite::Bool=false) else error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") end -end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 4be3871..6462a73 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,10 +5,7 @@ using Test using Documenter DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin - using TidierFiles - # Determine the package root directory dynamically - project_root = dirname(dirname(pathof(TidierFiles))) - testing_files_path = joinpath(project_root, "testing_files") + using DataFrames, TidierFiles end); recursive=true) doctest(TidierFiles) diff --git a/testing_files/test.dta b/testing_files/test.dta index 36a2c0f..8269b14 100644 Binary files a/testing_files/test.dta and b/testing_files/test.dta differ diff --git a/testing_files/test.por b/testing_files/test.por index fa874a4..82c24bd 100644 --- a/testing_files/test.por +++ b/testing_files/test.por @@ -3,7 +3,7 @@ 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst uvwxyz .<(+|&[]!$*);^-/|,%_>?`:#@'="000000~000000000000000000000{}\0000000000000 -00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202403256/111 -02918/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 +00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202404086/051 +20718/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 /70/2/AB5/9/2/5/9/2/E0/F3/savA.2TTTTTTTTTNL5OO5E86AQKLF4DBN91H8O69O28QFPEJDHOB7F /3/porA.5TTTTTTTTTHCBJIASGCLNBD08QNGI34HICJI4HN1KT8R5IMF/ZZZZZZZZZZZZZZZZZZZZZZZ diff --git a/testing_files/test.sas7bdat b/testing_files/test.sas7bdat index 51fbe96..ae26ce8 100644 Binary files a/testing_files/test.sas7bdat and b/testing_files/test.sas7bdat differ diff --git a/testing_files/test.sav b/testing_files/test.sav index e95e0bc..35e3bc0 100644 Binary files a/testing_files/test.sav and b/testing_files/test.sav differ diff --git a/testing_files/test.xpt b/testing_files/test.xpt index 4ea905f..994c99b 100644 Binary files a/testing_files/test.xpt and b/testing_files/test.xpt differ diff --git a/testing_files/xlsxtest.xlsx b/testing_files/xlsxtest.xlsx index f6312fa..355b102 100644 Binary files a/testing_files/xlsxtest.xlsx and b/testing_files/xlsxtest.xlsx differ