diff --git a/.env b/.env new file mode 100644 index 0000000000..222989c353 --- /dev/null +++ b/.env @@ -0,0 +1,4 @@ +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DATABASE=0 +REDIS_PASSWORD= diff --git a/.gitignore b/.gitignore index 9c0ccc6751..56245fde97 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ target/ renku-*.bottle.json renku-*.bottle.tar.gz renku.rb + +.env diff --git a/Dockerfile b/Dockerfile.cli similarity index 96% rename from Dockerfile rename to Dockerfile.cli index 770a6e39ee..6e63b77185 100644 --- a/Dockerfile +++ b/Dockerfile.cli @@ -1,4 +1,4 @@ -FROM python:3.6-alpine as base +FROM python:3.7-alpine as base RUN apk add --no-cache git && \ pip install --no-cache --upgrade pip diff --git a/Dockerfile.svc b/Dockerfile.svc new file mode 100644 index 0000000000..ab9f4d06e7 --- /dev/null +++ b/Dockerfile.svc @@ -0,0 +1,18 @@ +FROM python:3.7-alpine + +RUN apk add --update --no-cache alpine-sdk g++ gcc linux-headers libxslt-dev python3-dev build-base openssl-dev libffi-dev git && \ + pip install --no-cache --upgrade pip setuptools pipenv requirements-builder + +RUN apk add --no-cache --allow-untrusted \ + --repository http://dl-cdn.alpinelinux.org/alpine/latest-stable/community \ + --repository http://dl-cdn.alpinelinux.org/alpine/latest-stable/main \ + --repository http://nl.alpinelinux.org/alpine/edge/community \ + git-lfs && \ + git lfs install + +COPY . /code/renku +WORKDIR /code/renku +RUN requirements-builder -e all --level=pypi setup.py > requirements.txt && pip install -r requirements.txt && pip install -e . && pip install gunicorn + + +ENTRYPOINT ["gunicorn", "renku.service.entrypoint:app", "-b", "0.0.0.0:8080"] diff --git a/MANIFEST.in b/MANIFEST.in index ab3e9b5e3c..3edbd959ec 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,15 +17,17 @@ # limitations under the License. # Check manifest will not automatically add these two files: +include renku/service/.env-example include .dockerignore include .editorconfig include .tx/config include *.md prune docs/_build recursive-include renku *.po *.pot *.mo - +recursive-include renku *.py # added by check_manifest.py include *.py +include *.yml include *.rst include *.sh include *.txt @@ -39,6 +41,7 @@ include babel.ini include brew.py include pytest.ini include snap/snapcraft.yaml +recursive-include renku *.json recursive-include .github CODEOWNERS recursive-include .travis *.sh recursive-include docs *.bat @@ -59,3 +62,4 @@ recursive-include renku *.yml recursive-include renku Dockerfile recursive-include tests *.py *.gz *.yml prune .github +prune .env diff --git a/Makefile b/Makefile index 2602ce90e6..1f07336ea4 100644 --- a/Makefile +++ b/Makefile @@ -69,3 +69,6 @@ brew-commit-bottle: *.bottle.json brew-release: open "https://github.com/SwissDataScienceCenter/renku-python/releases/new?tag=v$(shell brew info --json=v1 renku | jq -r '.[0].versions.stable')" + +service-container: + docker build -f Dockerfile.svc -t renku-svc . diff --git a/Pipfile.lock b/Pipfile.lock index 936bc038a6..368c8b5734 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "a3e161caf52b39ed8aa1de4a3306a163ca1043dfa219203a4cc1b11463c9007b" + "sha256": "c61855aa6c4438d0e2af40a860510eadc567849b6b0e0fba0fa1ba76663a89cb" }, "pipfile-spec": 6, "requires": { @@ -30,6 +30,25 @@ ], "version": "==1.5" }, + "apispec": { + "extras": [ + "yaml" + ], + "hashes": [ + "sha256:5fdaa1173b32515cc83f9d413a49a6c37fafc2b87f6b40e95923d3e85f0942c5", + "sha256:9e88c51517a6515612e818459f61c1bc06c00f2313e5187828bdbabaa7461473" + ], + "index": "pypi", + "version": "==3.0.0" + }, + "apispec-webframeworks": { + "hashes": [ + "sha256:02fb79a7e37bc4e71ad21f6a9ddfbfc8e919eede7ef685d35d2d8549c2d0282d", + "sha256:89502de27f87e10766a62c9caf2ce4d33abce3acda91ae50abb3ef4937763b59" + ], + "index": "pypi", + "version": "==0.5.0" + }, "appdirs": { "hashes": [ "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92", @@ -37,13 +56,6 @@ ], "version": "==1.4.3" }, - "asn1crypto": { - "hashes": [ - "sha256:0b199f211ae690df3db4fd6c1c4ff976497fb1da689193e368eedbadc53d9292", - "sha256:bca90060bd995c3f62c4433168eab407e44bdbdb567b3f3a396a676c1a4c4a3f" - ], - "version": "==1.0.1" - }, "atomicwrites": { "hashes": [ "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", @@ -53,10 +65,10 @@ }, "attrs": { "hashes": [ - "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2", - "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396" + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" ], - "version": "==19.2.0" + "version": "==19.3.0" }, "avro-cwl": { "hashes": [ @@ -94,36 +106,38 @@ }, "cffi": { "hashes": [ - "sha256:041c81822e9f84b1d9c401182e174996f0bae9991f33725d059b771744290774", - "sha256:046ef9a22f5d3eed06334d01b1e836977eeef500d9b78e9ef693f9380ad0b83d", - "sha256:066bc4c7895c91812eff46f4b1c285220947d4aa46fa0a2651ff85f2afae9c90", - "sha256:066c7ff148ae33040c01058662d6752fd73fbc8e64787229ea8498c7d7f4041b", - "sha256:2444d0c61f03dcd26dbf7600cf64354376ee579acad77aef459e34efcb438c63", - "sha256:300832850b8f7967e278870c5d51e3819b9aad8f0a2c8dbe39ab11f119237f45", - "sha256:34c77afe85b6b9e967bd8154e3855e847b70ca42043db6ad17f26899a3df1b25", - "sha256:46de5fa00f7ac09f020729148ff632819649b3e05a007d286242c4882f7b1dc3", - "sha256:4aa8ee7ba27c472d429b980c51e714a24f47ca296d53f4d7868075b175866f4b", - "sha256:4d0004eb4351e35ed950c14c11e734182591465a33e960a4ab5e8d4f04d72647", - "sha256:4e3d3f31a1e202b0f5a35ba3bc4eb41e2fc2b11c1eff38b362de710bcffb5016", - "sha256:50bec6d35e6b1aaeb17f7c4e2b9374ebf95a8975d57863546fa83e8d31bdb8c4", - "sha256:55cad9a6df1e2a1d62063f79d0881a414a906a6962bc160ac968cc03ed3efcfb", - "sha256:5662ad4e4e84f1eaa8efce5da695c5d2e229c563f9d5ce5b0113f71321bcf753", - "sha256:59b4dc008f98fc6ee2bb4fd7fc786a8d70000d058c2bbe2698275bc53a8d3fa7", - "sha256:73e1ffefe05e4ccd7bcea61af76f36077b914f92b76f95ccf00b0c1b9186f3f9", - "sha256:a1f0fd46eba2d71ce1589f7e50a9e2ffaeb739fb2c11e8192aa2b45d5f6cc41f", - "sha256:a2e85dc204556657661051ff4bab75a84e968669765c8a2cd425918699c3d0e8", - "sha256:a5457d47dfff24882a21492e5815f891c0ca35fefae8aa742c6c263dac16ef1f", - "sha256:a8dccd61d52a8dae4a825cdbb7735da530179fea472903eb871a5513b5abbfdc", - "sha256:ae61af521ed676cf16ae94f30fe202781a38d7178b6b4ab622e4eec8cefaff42", - "sha256:b012a5edb48288f77a63dba0840c92d0504aa215612da4541b7b42d849bc83a3", - "sha256:d2c5cfa536227f57f97c92ac30c8109688ace8fa4ac086d19d0af47d134e2909", - "sha256:d42b5796e20aacc9d15e66befb7a345454eef794fdb0737d1af593447c6c8f45", - "sha256:dee54f5d30d775f525894d67b1495625dd9322945e7fee00731952e0368ff42d", - "sha256:e070535507bd6aa07124258171be2ee8dfc19119c28ca94c9dfb7efd23564512", - "sha256:e1ff2748c84d97b065cc95429814cdba39bcbd77c9c85c89344b317dc0d9cbff", - "sha256:ed851c75d1e0e043cbf5ca9a8e1b13c4c90f3fbd863dacb01c0808e2b5204201" - ], - "version": "==1.12.3" + "sha256:00d890313797d9fe4420506613384b43099ad7d2b905c0752dbcc3a6f14d80fa", + "sha256:0cf9e550ac6c5e57b713437e2f4ac2d7fd0cd10336525a27224f5fc1ec2ee59a", + "sha256:0ea23c9c0cdd6778146a50d867d6405693ac3b80a68829966c98dd5e1bbae400", + "sha256:193697c2918ecdb3865acf6557cddf5076bb39f1f654975e087b67efdff83365", + "sha256:1ae14b542bf3b35e5229439c35653d2ef7d8316c1fffb980f9b7647e544baa98", + "sha256:1e389e069450609c6ffa37f21f40cce36f9be7643bbe5051ab1de99d5a779526", + "sha256:263242b6ace7f9cd4ea401428d2d45066b49a700852334fd55311bde36dcda14", + "sha256:33142ae9807665fa6511cfa9857132b2c3ee6ddffb012b3f0933fc11e1e830d5", + "sha256:364f8404034ae1b232335d8c7f7b57deac566f148f7222cef78cf8ae28ef764e", + "sha256:47368f69fe6529f8f49a5d146ddee713fc9057e31d61e8b6dc86a6a5e38cecc1", + "sha256:4895640844f17bec32943995dc8c96989226974dfeb9dd121cc45d36e0d0c434", + "sha256:558b3afef987cf4b17abd849e7bedf64ee12b28175d564d05b628a0f9355599b", + "sha256:5ba86e1d80d458b338bda676fd9f9d68cb4e7a03819632969cf6d46b01a26730", + "sha256:63424daa6955e6b4c70dc2755897f5be1d719eabe71b2625948b222775ed5c43", + "sha256:6381a7d8b1ebd0bc27c3bc85bc1bfadbb6e6f756b4d4db0aa1425c3719ba26b4", + "sha256:6381ab708158c4e1639da1f2a7679a9bbe3e5a776fc6d1fd808076f0e3145331", + "sha256:6fd58366747debfa5e6163ada468a90788411f10c92597d3b0a912d07e580c36", + "sha256:728ec653964655d65408949b07f9b2219df78badd601d6c49e28d604efe40599", + "sha256:7cfcfda59ef1f95b9f729c56fe8a4041899f96b72685d36ef16a3440a0f85da8", + "sha256:819f8d5197c2684524637f940445c06e003c4a541f9983fd30d6deaa2a5487d8", + "sha256:825ecffd9574557590e3225560a8a9d751f6ffe4a49e3c40918c9969b93395fa", + "sha256:9009e917d8f5ef780c2626e29b6bc126f4cb2a4d43ca67aa2b40f2a5d6385e78", + "sha256:9c77564a51d4d914ed5af096cd9843d90c45b784b511723bd46a8a9d09cf16fc", + "sha256:a19089fa74ed19c4fe96502a291cfdb89223a9705b1d73b3005df4256976142e", + "sha256:a40ed527bffa2b7ebe07acc5a3f782da072e262ca994b4f2085100b5a444bbb2", + "sha256:bb75ba21d5716abc41af16eac1145ab2e471deedde1f22c6f99bd9f995504df0", + "sha256:e22a00c0c81ffcecaf07c2bfb3672fa372c50e2bd1024ffee0da191c1b27fc71", + "sha256:e55b5a746fb77f10c83e8af081979351722f6ea48facea79d470b3731c7b2891", + "sha256:ec2fa3ee81707a5232bf2dfbd6623fdb278e070d596effc7e2d788f2ada71a05", + "sha256:fd82eb4694be712fcae03c717ca2e0fc720657ac226b80bbb597e971fc6928c2" + ], + "version": "==1.13.1" }, "chardet": { "hashes": [ @@ -134,10 +148,10 @@ }, "check-manifest": { "hashes": [ - "sha256:8754cc8efd7c062a3705b442d1c23ff702d4477b41a269c2e354b25e1f5535a4", - "sha256:a4c555f658a7c135b8a22bd26c2e55cfaf5876e4d5962d8c25652f2addd556bc" + "sha256:42de6eaab4ed149e60c9b367ada54f01a3b1e4d6846784f9b9710e770ff5572c", + "sha256:78dd077f2c70dbac7cfcc9d12cbd423914e787ea4b5631de45aecd25b524e8e3" ], - "version": "==0.39" + "version": "==0.40" }, "click": { "hashes": [ @@ -148,9 +162,9 @@ }, "click-completion": { "hashes": [ - "sha256:78072eecd5e25ea0d25ceaf99cd5f22aa2667d67231ae0819deab9b1ff3456fb" + "sha256:5bf816b81367e638a190b6e91b50779007d14301b3f9f3145d68e3cade7bce86" ], - "version": "==0.5.1" + "version": "==0.5.2" }, "coverage": { "hashes": [ @@ -191,24 +205,29 @@ }, "cryptography": { "hashes": [ - "sha256:24b61e5fcb506424d3ec4e18bca995833839bf13c59fc43e530e488f28d46b8c", - "sha256:25dd1581a183e9e7a806fe0543f485103232f940fcfc301db65e630512cce643", - "sha256:3452bba7c21c69f2df772762be0066c7ed5dc65df494a1d53a58b683a83e1216", - "sha256:41a0be220dd1ed9e998f5891948306eb8c812b512dc398e5a01846d855050799", - "sha256:5751d8a11b956fbfa314f6553d186b94aa70fdb03d8a4d4f1c82dcacf0cbe28a", - "sha256:5f61c7d749048fa6e3322258b4263463bfccefecb0dd731b6561cb617a1d9bb9", - "sha256:72e24c521fa2106f19623a3851e9f89ddfdeb9ac63871c7643790f872a305dfc", - "sha256:7b97ae6ef5cba2e3bb14256625423413d5ce8d1abb91d4f29b6d1a081da765f8", - "sha256:961e886d8a3590fd2c723cf07be14e2a91cf53c25f02435c04d39e90780e3b53", - "sha256:96d8473848e984184b6728e2c9d391482008646276c3ff084a1bd89e15ff53a1", - "sha256:ae536da50c7ad1e002c3eee101871d93abdc90d9c5f651818450a0d3af718609", - "sha256:b0db0cecf396033abb4a93c95d1602f268b3a68bb0a9cc06a7cff587bb9a7292", - "sha256:cfee9164954c186b191b91d4193989ca994703b2fff406f71cf454a2d3c7327e", - "sha256:e6347742ac8f35ded4a46ff835c60e68c22a536a8ae5c4422966d06946b6d4c6", - "sha256:f27d93f0139a3c056172ebb5d4f9056e770fdf0206c2f422ff2ebbad142e09ed", - "sha256:f57b76e46a58b63d1c6375017f4564a28f19a5ca912691fd2e4261b3414b618d" - ], - "version": "==2.7" + "sha256:02079a6addc7b5140ba0825f542c0869ff4df9a69c360e339ecead5baefa843c", + "sha256:1df22371fbf2004c6f64e927668734070a8953362cd8370ddd336774d6743595", + "sha256:369d2346db5934345787451504853ad9d342d7f721ae82d098083e1f49a582ad", + "sha256:3cda1f0ed8747339bbdf71b9f38ca74c7b592f24f65cdb3ab3765e4b02871651", + "sha256:44ff04138935882fef7c686878e1c8fd80a723161ad6a98da31e14b7553170c2", + "sha256:4b1030728872c59687badcca1e225a9103440e467c17d6d1730ab3d2d64bfeff", + "sha256:58363dbd966afb4f89b3b11dfb8ff200058fbc3b947507675c19ceb46104b48d", + "sha256:6ec280fb24d27e3d97aa731e16207d58bd8ae94ef6eab97249a2afe4ba643d42", + "sha256:7270a6c29199adc1297776937a05b59720e8a782531f1f122f2eb8467f9aab4d", + "sha256:73fd30c57fa2d0a1d7a49c561c40c2f79c7d6c374cc7750e9ac7c99176f6428e", + "sha256:7f09806ed4fbea8f51585231ba742b58cbcfbfe823ea197d8c89a5e433c7e912", + "sha256:90df0cc93e1f8d2fba8365fb59a858f51a11a394d64dbf3ef844f783844cc793", + "sha256:971221ed40f058f5662a604bd1ae6e4521d84e6cad0b7b170564cc34169c8f13", + "sha256:a518c153a2b5ed6b8cc03f7ae79d5ffad7315ad4569b2d5333a13c38d64bd8d7", + "sha256:b0de590a8b0979649ebeef8bb9f54394d3a41f66c5584fff4220901739b6b2f0", + "sha256:b43f53f29816ba1db8525f006fa6f49292e9b029554b3eb56a189a70f2a40879", + "sha256:d31402aad60ed889c7e57934a03477b572a03af7794fa8fb1780f21ea8f6551f", + "sha256:de96157ec73458a7f14e3d26f17f8128c959084931e8997b9e655a39c8fde9f9", + "sha256:df6b4dca2e11865e6cfbfb708e800efb18370f5a46fd601d3755bc7f85b3a8a2", + "sha256:ecadccc7ba52193963c0475ac9f6fa28ac01e01349a2ca48509667ef41ffd2cf", + "sha256:fb81c17e0ebe3358486cd8cc3ad78adbae58af12fc2bf2bc0bb84e8090fa5ce8" + ], + "version": "==2.8" }, "cwlref-runner": { "hashes": [ @@ -274,6 +293,20 @@ ], "version": "==3.7.8" }, + "flask": { + "hashes": [ + "sha256:13f9f196f330c7c2c5d7a5cf91af894110ca0215ac051b5844701f2bfd934d52", + "sha256:45eb5a6fd193d6cf7e0cf5d8a5b31f83d5faae0293695626f539a823e93b13f6" + ], + "version": "==1.1.1" + }, + "flask-swagger-ui": { + "hashes": [ + "sha256:3282c770764c8053360f33b2fc120e1d169ecca2138537d0e6e1135b1f9d4ff2" + ], + "index": "pypi", + "version": "==3.20.9" + }, "freezegun": { "hashes": [ "sha256:2a4d9c8cd3c04a201e20c313caf8b6338f1cfa4cda43f46a94cc4a9fd13ea5e7", @@ -298,10 +331,10 @@ }, "gitpython": { "hashes": [ - "sha256:631263cc670aa56ce3d3c414cf0fe2e840f2e913514b138ea28d88a477bbcd21", - "sha256:6e97b9f0954807f30c2dd8e3165731ed6c477a1b365f194b69d81d7940a08332" + "sha256:3237caca1139d0a7aa072f6735f5fd2520de52195e0fa1d8b83a9b212a2498b2", + "sha256:a7d6bef0775f66ba47f25911d285bcd692ce9053837ff48a120c2b8cf3a71389" ], - "version": "==3.0.3" + "version": "==3.0.4" }, "idna": { "hashes": [ @@ -340,6 +373,13 @@ ], "version": "==4.3.4" }, + "itsdangerous": { + "hashes": [ + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" + ], + "version": "==1.1.0" + }, "jinja2": { "hashes": [ "sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f", @@ -437,9 +477,10 @@ }, "mypy-extensions": { "hashes": [ - "sha256:a161e3b917053de87dbe469987e173e49fb454eca10ef28b48b384538cc11458" + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" ], - "version": "==0.4.2" + "version": "==0.4.3" }, "ndg-httpsclient": { "hashes": [ @@ -451,9 +492,10 @@ }, "networkx": { "hashes": [ - "sha256:8311ddef63cf5c5c5e7c1d0212dd141d9a1fe3f474915281b73597ed5f1d4e3d" + "sha256:cdfbf698749a5014bf2ed9db4a07a5295df1d3a53bf80bf3cbd61edf9df05fa1", + "sha256:f8f4ff0b6f96e4f9b16af6b84622597b5334bf9cae8cf9b2e42e7985d5c95c64" ], - "version": "==2.3" + "version": "==2.4" }, "packaging": { "hashes": [ @@ -552,6 +594,14 @@ ], "version": "==2.4.2" }, + "pyjwt": { + "hashes": [ + "sha256:5c6eca3c2940464d106b99ba83b00c6add741c9becaec087fb7ccdefea71350e", + "sha256:8d59a976fb773f3e6a39c85636357c4f0e242707394cadadd9814f5cbaa20e96" + ], + "index": "pypi", + "version": "==1.7.1" + }, "pyld": { "hashes": [ "sha256:ce6d9cd065fb3a390ec65e665dcb3655ed2aa07431d98e201ea3bc99f56a8bfb" @@ -753,10 +803,10 @@ }, "sentry-sdk": { "hashes": [ - "sha256:15e51e74b924180c98bcd636cb4634945b0a99a124d50b433c3a9dc6a582e8db", - "sha256:1d6a2ee908ec6d8f96c27d78bc39e203df4d586d287c233140af7d8d1aca108a" + "sha256:7d8668f082cb1eb9bf1e0d3f8f9bd5796d05d927c1197af226d044ed32b9815f", + "sha256:ff14935cc3053de0650128f124c36f34a4be120b8cc522c149f5cba342c1fd05" ], - "version": "==0.12.3" + "version": "==0.13.0" }, "setuptools-scm": { "hashes": [ diff --git a/conftest.py b/conftest.py index a0e4fa49bd..483d2b6dbb 100644 --- a/conftest.py +++ b/conftest.py @@ -25,13 +25,19 @@ import tempfile import time import urllib +import uuid from pathlib import Path +import fakeredis import pytest import responses import yaml from click.testing import CliRunner +from renku.core.utils.contexts import chdir +from renku.service.cache import ServiceCache +from renku.service.entrypoint import create_app + @pytest.fixture(scope='module') def renku_path(tmpdir_factory): @@ -510,3 +516,173 @@ def remote_project(data_repository, directory_tree): assert 0 == result.exit_code yield runner, project_path + + +@pytest.fixture(scope='function') +def dummy_datapack(): + """Creates dummy data folder.""" + temp_dir = tempfile.TemporaryDirectory() + + data_file_txt = Path(temp_dir.name) / Path('file.txt') + data_file_txt.write_text('my awesome data') + + data_file_csv = Path(temp_dir.name) / Path('file.csv') + data_file_csv.write_text('more,awesome,data') + + yield temp_dir + + +@pytest.fixture(scope='function') +def datapack_zip(dummy_datapack): + """Returns dummy data folder as a zip archive.""" + workspace_dir = tempfile.TemporaryDirectory() + with chdir(workspace_dir.name): + shutil.make_archive('datapack', 'zip', dummy_datapack.name) + + yield Path(workspace_dir.name) / 'datapack.zip' + + +@pytest.fixture(scope='function') +def datapack_tar(dummy_datapack): + """Returns dummy data folder as a tar archive.""" + workspace_dir = tempfile.TemporaryDirectory() + with chdir(workspace_dir.name): + shutil.make_archive('datapack', 'tar', dummy_datapack.name) + + yield Path(workspace_dir.name) / 'datapack.tar' + + +@pytest.fixture(scope='function') +def mock_redis(monkeypatch): + """Monkey patch service cache with mocked redis.""" + with monkeypatch.context() as m: + m.setattr(ServiceCache, 'cache', fakeredis.FakeRedis()) + yield + + +@pytest.fixture(scope='function') +def svc_client(mock_redis): + """Renku service client.""" + flask_app = create_app() + + testing_client = flask_app.test_client() + testing_client.testing = True + + ctx = flask_app.app_context() + ctx.push() + + yield testing_client + + ctx.pop() + + +@pytest.fixture(scope='function') +def svc_client_with_repo(svc_client, mock_redis): + """Renku service remote repository.""" + remote_url = 'https://renkulab.io/gitlab/contact/integration-tests.git' + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'Bearer b4b4de0eda0f471ab82702bd5c367fa7', + } + + payload = { + 'git_url': remote_url, + 'git_username': 'contact', + 'git_access_token': 'EcfPJvEqjJepyu6XyqKZ', + } + + response = svc_client.post( + '/cache/project-clone', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert 'result' in response.json + assert 'error' not in response.json + project_id = response.json['result']['project_id'] + assert isinstance(uuid.UUID(project_id), uuid.UUID) + + yield svc_client, headers, project_id + + +@pytest.fixture( + params=[ + { + 'url': '/cache/files-list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/cache/files-upload', + 'allowed_method': 'POST', + 'headers': {} + }, + { + 'url': '/cache/project-clone', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/cache/project-list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets/add', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets/create', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets/files-list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets/list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + ] +) +def service_allowed_endpoint(request, svc_client, mock_redis): + """Ensure allowed methods and correct headers.""" + methods = { + 'GET': svc_client.get, + 'POST': svc_client.post, + 'HEAD': svc_client.head, + 'PUT': svc_client.put, + 'DELETE': svc_client.delete, + 'OPTIONS': svc_client.options, + 'TRACE': svc_client.trace, + 'PATCH': svc_client.patch, + } + + yield methods, request.param, svc_client diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..96436d1b24 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3' + +services: + redis: + image: redis:5.0.3-alpine + ports: + - "6379:6379" + + renku-svc: + image: renku-svc:latest + env_file: .env + ports: + - "8080:8080" diff --git a/renku/cli/__init__.py b/renku/cli/__init__.py index 3a33d746d6..e862182ef5 100644 --- a/renku/cli/__init__.py +++ b/renku/cli/__init__.py @@ -90,7 +90,7 @@ option_use_external_storage from renku.core.commands.version import check_version, print_version from renku.core.management.client import LocalClient -from renku.core.management.config import ConfigManagerMixin, RENKU_HOME +from renku.core.management.config import RENKU_HOME, ConfigManagerMixin from renku.core.management.repository import default_path #: Monkeypatch Click application. diff --git a/renku/core/commands/client.py b/renku/core/commands/client.py index 12d103fae0..f9eec0d0e2 100644 --- a/renku/core/commands/client.py +++ b/renku/core/commands/client.py @@ -25,6 +25,8 @@ import yaml from renku.core.management import LocalClient +from renku.core.management.config import RENKU_HOME +from renku.core.management.repository import default_path from .git import get_git_isolation @@ -63,8 +65,17 @@ def pass_local_client( ) def new_func(*args, **kwargs): - ctx = click.get_current_context() - client = ctx.ensure_object(LocalClient) + ctx = click.get_current_context(silent=True) + if not ctx: + client = LocalClient( + path=default_path(), + renku_home=RENKU_HOME, + use_external_storage=True, + ) + ctx = click.Context(click.Command(method)) + else: + client = ctx.ensure_object(LocalClient) + stack = contextlib.ExitStack() # Handle --isolation option: @@ -85,8 +96,11 @@ def new_func(*args, **kwargs): if lock or (lock is None and commit): stack.enter_context(client.lock) - with stack: - result = ctx.invoke(method, client, *args, **kwargs) + result = None + if ctx: + with stack: + result = ctx.invoke(method, client, *args, **kwargs) + return result return functools.update_wrapper(new_func, method) diff --git a/renku/core/commands/dataset.py b/renku/core/commands/dataset.py index 5712e1b01e..929ee544af 100644 --- a/renku/core/commands/dataset.py +++ b/renku/core/commands/dataset.py @@ -143,9 +143,12 @@ def add_file( destination='', ref=None, with_metadata=None, - urlscontext=contextlib.nullcontext + urlscontext=contextlib.nullcontext, + use_external_storage=False ): """Add data file to a dataset.""" + client.use_external_storage = use_external_storage + add_to_dataset( client, urls, name, link, force, create, sources, destination, ref, with_metadata, urlscontext diff --git a/renku/core/management/repository.py b/renku/core/management/repository.py index a937390738..114081ed40 100644 --- a/renku/core/management/repository.py +++ b/renku/core/management/repository.py @@ -47,13 +47,18 @@ def default_path(): return '.' +def path_converter(path): + """Converter for path in PathMixin.""" + return Path(path).resolve() + + @attr.s class PathMixin: """Define a default path attribute.""" path = attr.ib( default=default_path, - converter=lambda arg: Path(arg).resolve().absolute(), + converter=path_converter, ) @path.validator diff --git a/renku/core/utils/contexts.py b/renku/core/utils/contexts.py index 77de0bc61b..06131ade45 100644 --- a/renku/core/utils/contexts.py +++ b/renku/core/utils/contexts.py @@ -26,6 +26,9 @@ @contextlib.contextmanager def chdir(path): """Change the current working directory.""" + if isinstance(path, Path): + path = str(path) + cwd = os.getcwd() os.chdir(path) try: diff --git a/renku/service/.env-example b/renku/service/.env-example new file mode 100644 index 0000000000..06099fe73c --- /dev/null +++ b/renku/service/.env-example @@ -0,0 +1,6 @@ +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DATABASE=0 +REDIS_PASSWORD= + +CACHE_DIR= diff --git a/renku/service/__init__.py b/renku/service/__init__.py new file mode 100644 index 0000000000..1928b35350 --- /dev/null +++ b/renku/service/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service.""" diff --git a/renku/service/cache/__init__.py b/renku/service/cache/__init__.py new file mode 100644 index 0000000000..ca18ae78ec --- /dev/null +++ b/renku/service/cache/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache management for files.""" +from renku.service.cache.files import FileManagementCache +from renku.service.cache.projects import ProjectManagementCache + + +class ServiceCache(FileManagementCache, ProjectManagementCache): + """Service cache manager.""" + + pass diff --git a/renku/service/cache/base.py b/renku/service/cache/base.py new file mode 100644 index 0000000000..44984391ca --- /dev/null +++ b/renku/service/cache/base.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache management.""" +import json + +import redis +from redis import RedisError + +from renku.service.cache.config import REDIS_DATABASE, REDIS_HOST, \ + REDIS_PASSWORD, REDIS_PORT + + +class BaseCache: + """Cache management.""" + + cache = redis.Redis( + host=REDIS_HOST, + port=REDIS_PORT, + db=REDIS_DATABASE, + password=REDIS_PASSWORD + ) + + def set_record(self, name, key, value): + """Insert a record to hash set.""" + if isinstance(value, dict): + value = json.dumps(value) + + self.cache.hset(name, key, value) + + def invalidate_key(self, name, key): + """Invalidate cache `key` in users hash set.""" + try: + self.cache.hdel(name, key) + except RedisError: + pass + + def get_record(self, name, key): + """Return record values from hash set.""" + result = self.cache.hget(name, key) + if result: + return json.loads(result) + + def get_all_records(self, name): + """Return all record values from hash set.""" + return [ + json.loads(record) for record in self.cache.hgetall(name).values() + ] diff --git a/renku/service/cache/config.py b/renku/service/cache/config.py new file mode 100644 index 0000000000..7afb2d6b68 --- /dev/null +++ b/renku/service/cache/config.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache configuration.""" +import os + +REDIS_HOST = os.getenv('REDIS_HOST', '0.0.0.0') +REDIS_PORT = int(os.getenv('REDIS_PORT', 6379)) +REDIS_DATABASE = int(os.getenv('REDIS_DATABASE', 0)) +REDIS_PASSWORD = os.getenv('REDIS_PASSWORD') diff --git a/renku/service/cache/files.py b/renku/service/cache/files.py new file mode 100644 index 0000000000..6c013d2350 --- /dev/null +++ b/renku/service/cache/files.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service files cache management.""" +from renku.service.cache.base import BaseCache + + +class FileManagementCache(BaseCache): + """File management cache.""" + + FILES_SUFFIX = 'files' + + def files_cache_key(self, user): + """Construct cache key based on user and files prefix.""" + return '{0}_{1}'.format(user, self.FILES_SUFFIX) + + def set_file(self, user, file_id, metadata): + """Cache file metadata under user hash set.""" + self.set_record(self.files_cache_key(user), file_id, metadata) + + def set_files(self, user, files): + """Cache a list of metadata files under user hash set.""" + for file_ in files: + self.set_file(user, file_['file_id'], file_) + + def get_files(self, user): + """Get all user cached files.""" + return self.get_all_records(self.files_cache_key(user)) + + def get_file(self, user, file_id): + """Get user cached file.""" + result = self.get_record(self.files_cache_key(user), file_id) + return result + + def invalidate_file(self, user, file_id): + """Remove file record from hash set.""" + self.invalidate_key(self.files_cache_key(user), file_id) diff --git a/renku/service/cache/projects.py b/renku/service/cache/projects.py new file mode 100644 index 0000000000..4f966d3cee --- /dev/null +++ b/renku/service/cache/projects.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service project cache management.""" +from renku.service.cache.base import BaseCache + + +class ProjectManagementCache(BaseCache): + """Project management cache.""" + + PROJECTS_SUFFIX = 'projects' + + def projects_cache_key(self, user): + """Construct cache key based on user and projects prefix.""" + return '{0}_{1}'.format(user, self.PROJECTS_SUFFIX) + + def set_project(self, user, project_id, metadata): + """Cache project metadata under user hash set.""" + self.set_record(self.projects_cache_key(user), project_id, metadata) + + def get_projects(self, user): + """Get all user cache projects.""" + return self.get_all_records(self.projects_cache_key(user)) + + def get_project(self, user, project_id): + """Get user cached project.""" + result = self.get_record(self.projects_cache_key(user), project_id) + return result + + def invalidate_project(self, user, project_id): + """Remove project record from hash set.""" + self.invalidate_key(self.projects_cache_key(user), project_id) diff --git a/renku/service/config.py b/renku/service/config.py new file mode 100644 index 0000000000..d43b352a80 --- /dev/null +++ b/renku/service/config.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service config.""" +import os +import tempfile +from pathlib import Path + +GIT_ACCESS_DENIED_ERROR_CODE = -32000 +GIT_UNKNOWN_ERROR_CODE = -32001 + +RENKU_EXCEPTION_ERROR_CODE = -32100 +REDIS_EXCEPTION_ERROR_CODE = -32200 + +INVALID_HEADERS_ERROR_CODE = -32601 +INVALID_PARAMS_ERROR_CODE = -32602 +INTERNAL_FAILURE_ERROR_CODE = -32603 + +SERVICE_NAME = 'Renku Service' +OPENAPI_VERSION = '2.0' +API_VERSION = 'v1' + +SWAGGER_URL = '/api/docs' +API_SPEC_URL = os.getenv( + 'RENKU_SVC_SWAGGER_URL', '/api/{0}/spec'.format(API_VERSION) +) + +CACHE_DIR = os.getenv('CACHE_DIR', tempfile.TemporaryDirectory().name) +CACHE_UPLOADS_PATH = Path(CACHE_DIR) / Path('uploads') +CACHE_UPLOADS_PATH.mkdir(parents=True, exist_ok=True) + +CACHE_PROJECTS_PATH = Path(CACHE_DIR) / Path('projects') +CACHE_PROJECTS_PATH.mkdir(parents=True, exist_ok=True) + +TAR_ARCHIVE_CONTENT_TYPE = 'application/x-tar' +ZIP_ARCHIVE_CONTENT_TYPE = 'application/zip' + +SUPPORTED_ARCHIVES = [ + TAR_ARCHIVE_CONTENT_TYPE, + ZIP_ARCHIVE_CONTENT_TYPE, +] diff --git a/renku/service/entrypoint.py b/renku/service/entrypoint.py new file mode 100644 index 0000000000..7f645c9e90 --- /dev/null +++ b/renku/service/entrypoint.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service entry point.""" +import os +import uuid + +from apispec import APISpec +from apispec.ext.marshmallow import MarshmallowPlugin +from flask import Flask +from flask_apispec import FlaskApiSpec +from flask_swagger_ui import get_swaggerui_blueprint + +from renku.service.cache import ServiceCache +from renku.service.config import API_SPEC_URL, API_VERSION, CACHE_DIR, \ + CACHE_PROJECTS_PATH, CACHE_UPLOADS_PATH, OPENAPI_VERSION, SERVICE_NAME, \ + SWAGGER_URL +from renku.service.views.cache import CACHE_BLUEPRINT_TAG, cache_blueprint, \ + list_projects_view, list_uploaded_files_view, project_clone, \ + upload_file_view +from renku.service.views.datasets import DATASET_BLUEPRINT_TAG, \ + add_file_to_dataset_view, create_dataset_view, dataset_blueprint, \ + list_dataset_files_view, list_datasets_view + + +def make_cache(): + """Create cache structure.""" + sub_dirs = [CACHE_UPLOADS_PATH, CACHE_PROJECTS_PATH] + + for subdir in sub_dirs: + if not subdir.exists(): + subdir.mkdir() + + return ServiceCache() + + +def create_app(): + """Creates a Flask app with necessary configuration.""" + app = Flask(__name__) + app.secret_key = os.getenv('RENKU_SVC_SERVICE_KEY', uuid.uuid4().hex) + + app.config['UPLOAD_FOLDER'] = CACHE_DIR + app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 + + cache = make_cache() + app.config['cache'] = cache + + build_routes(app) + return app + + +def build_routes(app): + """Register routes to given app instance.""" + app.config.update({ + 'APISPEC_SPEC': + APISpec( + title=SERVICE_NAME, + openapi_version=OPENAPI_VERSION, + version=API_VERSION, + plugins=[MarshmallowPlugin()], + ), + 'APISPEC_SWAGGER_URL': API_SPEC_URL, + }) + app.register_blueprint(cache_blueprint) + app.register_blueprint(dataset_blueprint) + + swaggerui_blueprint = get_swaggerui_blueprint( + SWAGGER_URL, API_SPEC_URL, config={'app_name': 'Renku Service'} + ) + app.register_blueprint(swaggerui_blueprint, url_prefix=SWAGGER_URL) + + docs = FlaskApiSpec(app) + + docs.register(upload_file_view, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(list_uploaded_files_view, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(project_clone, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(list_projects_view, blueprint=CACHE_BLUEPRINT_TAG) + + docs.register(create_dataset_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(add_file_to_dataset_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(list_datasets_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(list_dataset_files_view, blueprint=DATASET_BLUEPRINT_TAG) + + +app = create_app() + +if __name__ == '__main__': + app.run() diff --git a/renku/service/serializers/__init__.py b/renku/service/serializers/__init__.py new file mode 100644 index 0000000000..362f6221d1 --- /dev/null +++ b/renku/service/serializers/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service serializers.""" diff --git a/renku/service/serializers/cache.py b/renku/service/serializers/cache.py new file mode 100644 index 0000000000..ef339fea7e --- /dev/null +++ b/renku/service/serializers/cache.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache serializers.""" +import time +import uuid +from urllib.parse import urlparse + +from marshmallow import Schema, ValidationError, fields, post_load, pre_load, \ + validates +from werkzeug.utils import secure_filename + +from renku.core.errors import ConfigurationError +from renku.core.models.git import GitURL +from renku.service.serializers.rpc import JsonRPCResponse + + +def extract_file(request): + """Extract file from Flask request. + + :raises: `ValidationError` + """ + files = request.files + if 'file' not in files: + raise ValidationError('missing key: file') + + file = files['file'] + if file and not file.filename: + raise ValidationError('wrong filename: {0}'.format(file.filename)) + + if file: + file.filename = secure_filename(file.filename) + return file + + +class FileUploadRequest(Schema): + """Request schema for file upload.""" + + override_existing = fields.Boolean(missing=False) + unpack_archive = fields.Boolean(missing=False) + + +class FileUploadDetails(Schema): + """Details schema for file upload.""" + + file_id = fields.String(missing=lambda: uuid.uuid4().hex) + timestamp = fields.Integer(missing=time.time() * 1e+3) + + content_type = fields.String(missing='unknown') + file_name = fields.String(required=True) + file_size = fields.Integer(required=True) + relative_path = fields.String(required=True) + is_archive = fields.Boolean(missing=False) + unpack_archive = fields.Boolean(missing=False) + + +class FileUploadResponse(Schema): + """Response schema for file upload.""" + + files = fields.List(fields.Nested(FileUploadDetails), required=True) + + +class FileUploadResponseRPC(JsonRPCResponse): + """RPC response schema for file upload response.""" + + result = fields.Nested(FileUploadResponse) + + +class FileListResponse(Schema): + """Response schema for files listing.""" + + files = fields.List(fields.Nested(FileUploadDetails), required=True) + + +class FileListResponseRPC(JsonRPCResponse): + """RPC response schema for files listing.""" + + result = fields.Nested(FileListResponse) + + +class ProjectCloneRequest(Schema): + """Request schema for project clone.""" + + git_url = fields.String(required=True) + git_username = fields.String(required=True) + git_access_token = fields.String(required=True) + + +class ProjectCloneDetails(ProjectCloneRequest): + """Details schema for project clone.""" + + project_id = fields.String(missing=lambda: uuid.uuid4().hex) + name = fields.String(required=True) + owner = fields.String(required=True) + + @validates('git_url') + def validate_git_url(self, value): + """Validates git url.""" + try: + GitURL.parse(value) + except ConfigurationError as e: + raise ValidationError(str(e)) + + return value + + @post_load() + def format_url(self, data, **kwargs): + """Format URL with username and password.""" + git_url = urlparse(data['git_url']) + url = '{0}:{1}@{2}'.format( + data['git_username'], data['git_access_token'], git_url.netloc + ) + + data['url_with_auth'] = git_url._replace(netloc=url).geturl() + return data + + @pre_load() + def set_owner_name(self, data, **kwargs): + """Set owner and name fields.""" + git_url = GitURL.parse(data['git_url']) + + data['owner'] = git_url.owner + data['name'] = git_url.name + + return data + + +class ProjectCloneResponse(Schema): + """Response schema for project clone.""" + + project_id = fields.String(required=True) + git_url = fields.String(required=True) + + +class ProjectCloneResponseRPC(JsonRPCResponse): + """RPC response schema for project clone response.""" + + result = fields.Nested(ProjectCloneResponse) + + +class ProjectListResponse(Schema): + """Response schema for project listing.""" + + projects = fields.List(fields.Nested(ProjectCloneResponse), required=True) + + +class ProjectListResponseRPC(JsonRPCResponse): + """RPC response schema for project listing.""" + + result = fields.Nested(ProjectListResponse) diff --git a/renku/service/serializers/datasets.py b/renku/service/serializers/datasets.py new file mode 100644 index 0000000000..68ca48d9db --- /dev/null +++ b/renku/service/serializers/datasets.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service datasets serializers.""" +from marshmallow import Schema, fields + +from renku.service.serializers.rpc import JsonRPCResponse + + +class DatasetCreateRequest(Schema): + """Request schema for dataset create view.""" + + dataset_name = fields.String(required=True) + project_id = fields.String(required=True) + + +class DatasetCreateResponse(Schema): + """Response schema for dataset create view.""" + + dataset_name = fields.String(required=True) + + +class DatasetCreateResponseRPC(JsonRPCResponse): + """RPC response schema for dataset create view.""" + + result = fields.Nested(DatasetCreateResponse) + + +class DatasetAddRequest(Schema): + """Request schema for dataset add file view.""" + + dataset_name = fields.String(required=True) + project_id = fields.String(required=True) + file_id = fields.String(required=True) + + +class DatasetAddResponse(Schema): + """Response schema for dataset add file view.""" + + dataset_name = fields.String(required=True) + project_id = fields.String(required=True) + file_id = fields.String(required=True) + + +class DatasetAddResponseRPC(JsonRPCResponse): + """RPC schema for dataset add.""" + + result = fields.Nested(DatasetAddResponse) + + +class DatasetListRequest(Schema): + """Request schema for dataset list view.""" + + project_id = fields.String(required=True) + + +class DatasetDetails(Schema): + """Serialize dataset to response object.""" + + identifier = fields.String(required=True) + name = fields.String(required=True) + version = fields.String(allow_none=True) + created = fields.String(required=True) + + +class DatasetListResponse(Schema): + """Response schema for dataset list view.""" + + datasets = fields.List(fields.Nested(DatasetDetails), required=True) + + +class DatasetListResponseRPC(JsonRPCResponse): + """RPC response schema for dataset list view.""" + + result = fields.Nested(DatasetListResponse) + + +class DatasetFilesListRequest(Schema): + """Request schema for dataset files list view.""" + + project_id = fields.String(required=True) + dataset_name = fields.String(required=True) + + +class DatasetFileDetails(Schema): + """Serialzie dataset files to response object.""" + + name = fields.String(required=True) + + +class DatasetFilesListResponse(Schema): + """Response schema for dataset files list view.""" + + dataset_name = fields.String(required=True) + files = fields.List(fields.Nested(DatasetFileDetails), required=True) + + +class DatasetFilesListResponseRPC(JsonRPCResponse): + """RPC schema for dataset files list view.""" + + result = fields.Nested(DatasetFilesListResponse) diff --git a/renku/service/serializers/rpc.py b/renku/service/serializers/rpc.py new file mode 100644 index 0000000000..6512d26001 --- /dev/null +++ b/renku/service/serializers/rpc.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service JSON-RPC serializers.""" +from marshmallow import Schema, fields + + +class JsonRPCResponse(Schema): + """JsonRPC response schema.""" + + error = fields.Dict() diff --git a/renku/service/utils/__init__.py b/renku/service/utils/__init__.py new file mode 100644 index 0000000000..23bb0efc4f --- /dev/null +++ b/renku/service/utils/__init__.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service utility functions.""" +from git import Repo + +from renku.service.config import CACHE_PROJECTS_PATH, CACHE_UPLOADS_PATH + + +def make_project_path(user, project): + """Construct full path for cached project.""" + if user and project and 'owner' in project and 'name' in project: + return CACHE_PROJECTS_PATH / user / project['owner'] / project['name'] + + +def make_file_path(user, cached_file): + """Construct full path for cache file.""" + if user and cached_file and 'file_name' in cached_file: + return CACHE_UPLOADS_PATH / user / cached_file['relative_path'] + + +def repo_sync(repo_path, remote_names=('origin', )): + """Sync the repo with the remotes.""" + repo = Repo(repo_path) + is_pushed = False + + for remote in repo.remotes: + if remote.name in remote_names: + remote.push() + is_pushed = True + + return is_pushed diff --git a/renku/service/views/__init__.py b/renku/service/views/__init__.py new file mode 100644 index 0000000000..fbe49ab1d7 --- /dev/null +++ b/renku/service/views/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service views.""" diff --git a/renku/service/views/cache.py b/renku/service/views/cache.py new file mode 100644 index 0000000000..cef7e74251 --- /dev/null +++ b/renku/service/views/cache.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache views.""" +import os +import shutil +from pathlib import Path + +import patoolib +from flask import Blueprint, jsonify, request +from flask_apispec import marshal_with, use_kwargs +from git import Repo +from marshmallow import EXCLUDE +from patoolib.util import PatoolError + +from renku.service.config import CACHE_UPLOADS_PATH, \ + INVALID_PARAMS_ERROR_CODE, SUPPORTED_ARCHIVES +from renku.service.serializers.cache import FileListResponse, \ + FileListResponseRPC, FileUploadDetails, FileUploadRequest, \ + FileUploadResponse, FileUploadResponseRPC, ProjectCloneDetails, \ + ProjectCloneRequest, ProjectCloneResponse, ProjectCloneResponseRPC, \ + ProjectListResponse, ProjectListResponseRPC, extract_file +from renku.service.utils import make_file_path, make_project_path +from renku.service.views.decorators import accepts_json, handle_base_except, \ + handle_git_except, handle_renku_except, handle_validation_except, \ + header_doc, requires_cache, requires_identity + +CACHE_BLUEPRINT_TAG = 'cache' +cache_blueprint = Blueprint('cache', __name__) + + +@marshal_with(FileListResponseRPC) +@header_doc(description='List uploaded files.', tags=(CACHE_BLUEPRINT_TAG, )) +@cache_blueprint.route( + '/cache/files-list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_uploaded_files_view(user, cache): + """List uploaded files ready to be added to projects.""" + files = [ + f for f in cache.get_files(user) if make_file_path(user, f).exists() + ] + + response = FileListResponse().load({'files': files}) + return jsonify(FileListResponseRPC().load({'result': response})) + + +@use_kwargs(FileUploadRequest) +@marshal_with(FileUploadResponseRPC) +@header_doc( + description='Upload file or archive of files.', + tags=(CACHE_BLUEPRINT_TAG, ), +) +@cache_blueprint.route( + '/cache/files-upload', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def upload_file_view(user, cache): + """Upload file or archive of files.""" + file = extract_file(request) + + response_builder = { + 'file_name': file.filename, + 'content_type': file.content_type, + 'is_archive': file.content_type in SUPPORTED_ARCHIVES + } + response_builder.update(FileUploadRequest().load(request.args)) + + user_cache_dir = CACHE_UPLOADS_PATH / user + user_cache_dir.mkdir(exist_ok=True) + + file_path = user_cache_dir / file.filename + if file_path.exists(): + if response_builder.get('override_existing', False): + file_path.unlink() + else: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'file exists', + } + ) + + file.save(str(file_path)) + + files = [] + if response_builder['unpack_archive'] and response_builder['is_archive']: + unpack_dir = '{0}.unpacked'.format(file_path.name) + temp_dir = file_path.parent / Path(unpack_dir) + if temp_dir.exists(): + shutil.rmtree(temp_dir) + temp_dir.mkdir(exist_ok=True) + + try: + patoolib.extract_archive(str(file_path), outdir=str(temp_dir)) + except PatoolError: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'unable to unpack archive' + } + ) + + for file_ in temp_dir.glob('**/*'): + file_obj = { + 'file_name': file_.name, + 'file_size': os.stat(file_path).st_size, + 'relative_path': + str(file_.relative_to(CACHE_UPLOADS_PATH / user)) + } + + files.append(FileUploadDetails().load(file_obj, unknown=EXCLUDE)) + + else: + response_builder['file_size'] = os.stat(file_path).st_size + response_builder['relative_path'] = str( + file_path.relative_to(CACHE_UPLOADS_PATH / user) + ) + files.append( + FileUploadDetails().load(response_builder, unknown=EXCLUDE) + ) + + response = FileUploadResponse().load({'files': files}) + cache.set_files(user, files) + + return jsonify(FileUploadResponseRPC().load({'result': response})) + + +@use_kwargs(ProjectCloneRequest) +@marshal_with(ProjectCloneResponseRPC) +@header_doc( + 'Clone a remote project. If the project is cached already, ' + 'new clone operation will override the old cache state.', + tags=(CACHE_BLUEPRINT_TAG, ) +) +@cache_blueprint.route( + '/cache/project-clone', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +@accepts_json +def project_clone(user, cache): + """Clone a remote repository.""" + ctx = ProjectCloneDetails().load(request.json) + local_path = make_project_path(user, ctx) + if local_path.exists(): + shutil.rmtree(local_path) + + local_path.mkdir(parents=True, exist_ok=True) + Repo.clone_from(ctx['url_with_auth'], str(local_path)) + cache.set_project(user, ctx['project_id'], ctx) + + response = ProjectCloneResponse().load(ctx, unknown=EXCLUDE) + return jsonify(ProjectCloneResponseRPC().load({'result': response})) + + +@marshal_with(ProjectListResponseRPC) +@header_doc( + 'List cached projects.', + tags=(CACHE_BLUEPRINT_TAG, ), +) +@cache_blueprint.route( + '/cache/project-list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_projects_view(user, cache): + """List cached projects.""" + projects = cache.get_projects(user) + projects = [ + ProjectCloneResponse().load(p, unknown=EXCLUDE) + for p in projects if make_project_path(user, p).exists() + ] + + response = ProjectListResponse().load({'projects': projects}) + return jsonify(ProjectListResponseRPC().load({'result': response})) diff --git a/renku/service/views/datasets.py b/renku/service/views/datasets.py new file mode 100644 index 0000000000..25f7176fa6 --- /dev/null +++ b/renku/service/views/datasets.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service datasets view.""" +import json + +from flask import Blueprint, jsonify, request +from flask_apispec import marshal_with, use_kwargs +from marshmallow import EXCLUDE + +from renku.core.commands.dataset import add_file, create_dataset, \ + dataset_parent, list_files +from renku.core.utils.contexts import chdir +from renku.service.config import INTERNAL_FAILURE_ERROR_CODE, \ + INVALID_PARAMS_ERROR_CODE +from renku.service.serializers.datasets import DatasetAddRequest, \ + DatasetAddResponse, DatasetAddResponseRPC, DatasetCreateRequest, \ + DatasetCreateResponse, DatasetCreateResponseRPC, DatasetDetails, \ + DatasetFileDetails, DatasetFilesListRequest, DatasetFilesListResponse, \ + DatasetFilesListResponseRPC, DatasetListRequest, DatasetListResponse, \ + DatasetListResponseRPC +from renku.service.utils import make_file_path, make_project_path, repo_sync +from renku.service.views.decorators import accepts_json, handle_base_except, \ + handle_git_except, handle_renku_except, handle_validation_except, \ + header_doc, requires_cache, requires_identity + +DATASET_BLUEPRINT_TAG = 'datasets' +dataset_blueprint = Blueprint(DATASET_BLUEPRINT_TAG, __name__) + + +@use_kwargs(DatasetListRequest) +@marshal_with(DatasetListResponseRPC) +@header_doc('List all datasets in project.', tags=(DATASET_BLUEPRINT_TAG, )) +@dataset_blueprint.route( + '/datasets/list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_datasets_view(user, cache): + """List all datasets in project.""" + req = DatasetListRequest().load(request.args) + project = cache.get_project(user, req['project_id']) + project_path = make_project_path(user, project) + + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'invalid project_id argument', + } + ) + + with chdir(project_path): + datasets = [ + DatasetDetails().load(ds, unknown=EXCLUDE) + # TODO: fix core interface to address this issue (add ticket ref) + for ds in json.loads(dataset_parent(None, 'data', 'json-ld')) + ] + + response = DatasetListResponse().load({'datasets': datasets}) + return jsonify(DatasetListResponseRPC().load({'result': response})) + + +@use_kwargs(DatasetFilesListRequest) +@marshal_with(DatasetFilesListResponseRPC) +@header_doc('List files in a dataset.', tags=(DATASET_BLUEPRINT_TAG, )) +@dataset_blueprint.route( + '/datasets/files-list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_dataset_files_view(user, cache): + """List files in a dataset.""" + ctx = DatasetFilesListRequest().load(request.args) + project = cache.get_project(user, ctx['project_id']) + project_path = make_project_path(user, project) + + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'invalid project_id argument', + } + ) + + with chdir(project_path): + dataset_files = json.loads( + # TODO: fix core interface to address this issue (add ticket ref) + list_files(ctx['dataset_name'], None, None, None, 'json-ld') + ) + ctx['files'] = [ + DatasetFileDetails().load(ds, unknown=EXCLUDE) + for ds in dataset_files + ] + + response = DatasetFilesListResponse().load(ctx, unknown=EXCLUDE) + return jsonify(DatasetFilesListResponseRPC().load({'result': response})) + + +@use_kwargs(DatasetAddRequest) +@marshal_with(DatasetAddResponseRPC) +@header_doc( + 'Add uploaded file to cloned repository.', tags=(DATASET_BLUEPRINT_TAG, ) +) +@dataset_blueprint.route( + '/datasets/add', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@accepts_json +@requires_cache +@requires_identity +def add_file_to_dataset_view(user, cache): + """Add uploaded file to cloned repository.""" + ctx = DatasetAddRequest().load(request.json) + project = cache.get_project(user, ctx['project_id']) + file = cache.get_file(user, ctx['file_id']) + + local_path = make_file_path(user, file) + if not local_path or not local_path.exists(): + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid file_id' + } + ) + + project_path = make_project_path(user, project) + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid project_id', + } + ) + + with chdir(project_path): + add_file([str(local_path)], + ctx['dataset_name'], + use_external_storage=False) + if not repo_sync(project_path): + return jsonify( + error={ + 'code': INTERNAL_FAILURE_ERROR_CODE, + 'message': 'remote name not found' + } + ) + + return jsonify( + DatasetAddResponseRPC().load({ + 'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE) + }) + ) + + +@use_kwargs(DatasetCreateRequest) +@marshal_with(DatasetCreateResponseRPC) +@header_doc( + 'Create a new dataset in a project.', tags=(DATASET_BLUEPRINT_TAG, ) +) +@dataset_blueprint.route( + '/datasets/create', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@accepts_json +@requires_cache +@requires_identity +def create_dataset_view(user, cache): + """Create a new dataset in a project.""" + ctx = DatasetCreateRequest().load(request.json) + project = cache.get_project(user, ctx['project_id']) + + project_path = make_project_path(user, project) + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid project_id argument', + } + ) + + with chdir(project_path): + create_dataset(ctx['dataset_name']) + + if not repo_sync(project_path): + return jsonify( + error={ + 'code': INTERNAL_FAILURE_ERROR_CODE, + 'reason': 'push to remote failed silently - try again' + } + ) + + return jsonify( + DatasetCreateResponseRPC().load({ + 'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE) + }) + ) diff --git a/renku/service/views/decorators.py b/renku/service/views/decorators.py new file mode 100644 index 0000000000..66be0c4950 --- /dev/null +++ b/renku/service/views/decorators.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service view decorators.""" +from functools import wraps + +from flask import current_app, jsonify, request +from flask_apispec import doc +from git import GitCommandError +from marshmallow import ValidationError +from redis import RedisError +from werkzeug.utils import secure_filename + +from renku.core.errors import RenkuException +from renku.service.config import GIT_ACCESS_DENIED_ERROR_CODE, \ + GIT_UNKNOWN_ERROR_CODE, INTERNAL_FAILURE_ERROR_CODE, \ + INVALID_HEADERS_ERROR_CODE, INVALID_PARAMS_ERROR_CODE, \ + REDIS_EXCEPTION_ERROR_CODE, RENKU_EXCEPTION_ERROR_CODE + + +def requires_identity(f): + """Wrapper which indicates that route requires user identification.""" + # noqa + @wraps(f) + def decorated_function(*args, **kws): + """Represents decorated function.""" + user = request.headers.get('Authorization', '').split(' ') + if user and not user[0]: + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': 'user identification is missing' + } + ) + + return f(secure_filename(user[-1]), *args, **kws) + + return decorated_function + + +def handle_redis_except(f): + """Wrapper which handles Redis exceptions.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except (RedisError, OSError) as e: + error_code = REDIS_EXCEPTION_ERROR_CODE + + return jsonify(error={ + 'code': error_code, + 'reason': e.messages, + }) + + return decorated_function + + +@handle_redis_except +def requires_cache(f): + """Wrapper which injects cache object into view.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + return f(current_app.config.get('cache'), *args, **kwargs) + + return decorated_function + + +def handle_validation_except(f): + """Wrapper which handles marshmallow `ValidationError`.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except ValidationError as e: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': e.messages, + } + ) + + return decorated_function + + +def handle_renku_except(f): + """Wrapper which handles `RenkuException`.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except RenkuException as e: + return jsonify( + error={ + 'code': RENKU_EXCEPTION_ERROR_CODE, + 'reason': str(e), + } + ) + + return decorated_function + + +def handle_git_except(f): + """Wrapper which handles `RenkuException`.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except GitCommandError as e: + + error_code = GIT_ACCESS_DENIED_ERROR_CODE \ + if 'Access denied' in e.stderr else GIT_UNKNOWN_ERROR_CODE + + return jsonify( + error={ + 'code': error_code, + 'reason': + 'git error: {0}'. + format(' '.join(e.stderr.strip().split('\n'))), + } + ) + + return decorated_function + + +def accepts_json(f): + """Wrapper which ensures only JSON payload can be in request.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + if 'Content-Type' not in request.headers: + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': 'invalid request headers' + } + ) + + header_check = request.headers['Content-Type'] == 'application/json' + + if not request.is_json or not header_check: + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': 'invalid request payload' + } + ) + + return f(*args, **kwargs) + + return decorated_function + + +def handle_base_except(f): + """Wrapper which handles base exceptions.""" + # nowa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except (Exception, BaseException, OSError) as e: + error_code = INTERNAL_FAILURE_ERROR_CODE + + return jsonify( + error={ + 'code': error_code, + 'reason': + 'internal error: {0}'. + format(' '.join(e.stderr.strip().split('\n'))), + } + ) + + return decorated_function + + +def header_doc(description, tags=()): + """Wrap additional OpenAPI header description for an endpoint.""" + return doc( + description=description, + params={ + 'Authorization': { + 'description': ( + 'Authorization HTTP header ' + 'used for identification of the users.' + 'For example: ' + '```Authorization: Bearer asdf.qwer.zxcv```' + ), + 'in': 'header', + 'type': 'string', + 'required': True + } + }, + tags=list(tags), + ) diff --git a/setup.py b/setup.py index 304b36d25a..7b24b380ba 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ tests_require = [ 'check-manifest>=0.37', 'coverage>=4.5.3', + 'fakeredis==1.0.5', 'flake8>=3.5', 'freezegun>=0.3.12', 'isort==4.3.4', @@ -73,6 +74,7 @@ install_requires = [ 'appdirs>=1.4.3', + 'apispec==3.0.0', 'attrs>=18.2.0', 'click-completion>=0.5.0', 'click>=7.0', @@ -80,6 +82,9 @@ 'cwltool==1.0.20181012180214', 'environ_config>=18.2.0', 'filelock>=3.0.0', + 'flask==1.1.1', + 'flask-apispec==0.8.3', + 'flask-swagger-ui==3.20.9', 'gitpython==3.0.3', 'patool>=1.12', 'psutil>=5.4.7', @@ -89,9 +94,11 @@ 'pyOpenSSL>=19.0.0', 'python-dateutil>=2.6.1', 'python-editor>=1.0.4', + 'redis==3.3.11', 'rdflib-jsonld>=0.4.0', 'requests>=2.21.0', 'ndg-httpsclient>=0.5.1', + 'marshmallow==3.2.2', 'idna>=2.8', 'setuptools_scm>=3.1.0', 'tabulate>=0.7.7', diff --git a/tests/service/test_cache_views.py b/tests/service/test_cache_views.py new file mode 100644 index 0000000000..1080bcef82 --- /dev/null +++ b/tests/service/test_cache_views.py @@ -0,0 +1,539 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache view tests.""" +import io +import json +import uuid + +import pytest + +from renku.core.models.git import GitURL +from renku.service.config import GIT_ACCESS_DENIED_ERROR_CODE, \ + INVALID_HEADERS_ERROR_CODE, INVALID_PARAMS_ERROR_CODE + + +@pytest.mark.service +def test_serve_api_spec(svc_client): + """Check serving of service spec.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + response = svc_client.get('/api/v1/spec', headers=headers) + + assert 0 != len(response.json.keys()) + assert 200 == response.status_code + + +@pytest.mark.service +def test_list_upload_files_all(svc_client): + """Check list uploaded files view.""" + headers_user = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'bearer user' + } + response = svc_client.get('/cache/files-list', headers=headers_user) + + assert {'result'} == set(response.json.keys()) + + assert 0 == len(response.json['result']['files']) + assert 200 == response.status_code + + +@pytest.mark.service +def test_list_upload_files_all_no_auth(svc_client): + """Check error response on list uploaded files view.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + response = svc_client.get( + '/cache/files-list', + headers=headers, + ) + + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + +@pytest.mark.service +def test_file_upload(svc_client): + """Check successful file upload.""" + headers_user = {'Authorization': 'bearer {0}'.format(uuid.uuid4().hex)} + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + + +@pytest.mark.service +def test_file_upload_override(svc_client): + """Check successful file upload.""" + headers_user = { + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + old_file_id = response.json['result']['files'][0]['file_id'] + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert 'file exists' == response.json['error']['reason'] + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + query_string={'override_existing': True}, + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + assert old_file_id != response.json['result']['files'][0]['file_id'] + + +@pytest.mark.service +def test_file_upload_same_file(svc_client): + """Check successful file upload with same file uploaded twice.""" + headers_user1 = { + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user1, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user1, + ) + + assert response + assert 200 == response.status_code + assert {'error'} == set(response.json.keys()) + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert 'file exists' == response.json['error']['reason'] + + +@pytest.mark.service +def test_file_upload_no_auth(svc_client): + """Check failed file upload.""" + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + ) + + assert response + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + +@pytest.mark.service +def test_file_upload_with_users(svc_client): + """Check successful file upload and listing based on user auth header.""" + headers_user1 = { + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + headers_user2 = {'Authorization': 'bearer {0}'.format(uuid.uuid4().hex)} + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + headers=headers_user1 + ) + + assert {'result'} == set(response.json.keys()) + + file_id = response.json['result']['files'][0]['file_id'] + assert file_id + assert 200 == response.status_code + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + headers=headers_user2 + ) + + assert response + assert {'result'} == set(response.json.keys()) + + response = svc_client.get('/cache/files-list', headers=headers_user1) + + assert response + + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + file = response.json['result']['files'][0] + assert file_id == file['file_id'] + assert 0 < file['file_size'] + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_no_auth(svc_client): + """Check error on cloning of remote repository.""" + remote_url = 'https://renkulab.io/gitlab/contact/integration-tests.git' + + payload = { + 'git_url': remote_url, + 'git_username': 'contact', + 'git_access_token': 'notatoken', + } + + response = svc_client.post( + '/cache/project-clone', data=json.dumps(payload) + ) + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + assert 'user identification is missing' == response.json['error']['reason'] + + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/cache/project-clone', data=json.dumps(payload), headers=headers + ) + + assert {'error'} == set(response.json.keys()) + assert GIT_ACCESS_DENIED_ERROR_CODE == response.json['error']['code'] + assert 'git error' in response.json['error']['reason'] + assert 'Access denied' in response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_with_auth(svc_client): + """Check cloning of remote repository.""" + remote_url = 'https://renkulab.io/gitlab/contact/integration-tests.git' + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + + payload = { + 'git_username': 'contact', + 'git_access_token': 'EcfPJvEqjJepyu6XyqKZ', + 'git_url': remote_url + } + + response = svc_client.post( + '/cache/project-clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_list_view_errors(svc_client): + """Check cache state of cloned projects with no headers.""" + remote_url = 'https://renkulab.io/gitlab/contact/integration-tests.git' + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'bearer {0}'.format(uuid.uuid4().hex), + } + + payload = { + 'git_url': remote_url, + 'git_username': 'contact', + 'git_access_token': 'EcfPJvEqjJepyu6XyqKZ', + } + + response = svc_client.post( + '/cache/project-clone', data=json.dumps(payload), headers=headers + ) + assert response + assert {'result'} == set(response.json.keys()) + + assert isinstance( + uuid.UUID(response.json['result']['project_id']), uuid.UUID + ) + + response = svc_client.get( + '/cache/project-list', + # no auth headers, expected error + ) + assert response + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + response = svc_client.get('/cache/project-list', headers=headers) + assert response + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['projects']) + + project = response.json['result']['projects'][0] + assert isinstance(uuid.UUID(project['project_id']), uuid.UUID) + assert isinstance(GitURL.parse(project['git_url']), GitURL) + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_invalid_auth(svc_client): + """Check cache state of cloned projects with invalid auth.""" + remote_url = 'https://renkulab.io/gitlab/contact/integration-tests.git' + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'bearer b4b4de0eda0f471ab82702bd5c367fa7', + } + + payload = { + 'git_url': remote_url, + 'git_username': 'notsam', + 'git_access_token': 'notvalidtoken', + } + + response = svc_client.post( + '/cache/project-clone', + data=json.dumps(payload), + headers=headers, + ) + assert response + + assert {'error'} == set(response.json.keys()) + assert GIT_ACCESS_DENIED_ERROR_CODE == response.json['error']['code'] + + response = svc_client.get( + '/cache/project-list', + # no auth headers, expected error + ) + + assert response + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + response = svc_client.get('/cache/project-list', headers=headers) + assert response + assert {'result'} == set(response.json.keys()) + assert 0 == len(response.json['result']['projects']) + + +@pytest.mark.service +def test_upload_zip_unpack_archive(datapack_zip, svc_client_with_repo): + """Upload zip archive with unpack.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 2 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_zip_archive(datapack_zip, svc_client_with_repo): + """Upload zip archive.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': False, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_tar_unpack_archive(datapack_tar, svc_client_with_repo): + """Upload zip archive with unpack.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 2 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_tar_archive(datapack_tar, svc_client_with_repo): + """Upload zip archive.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': False, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_field_upload_resp_fields(datapack_tar, svc_client_with_repo): + """Check response fields.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert 2 == len(response.json['result']['files']) + assert { + 'content_type', + 'file_id', + 'file_name', + 'file_size', + 'is_archive', + 'timestamp', + 'is_archive', + 'unpack_archive', + 'relative_path', + } == set(response.json['result']['files'][0].keys()) + + assert not response.json['result']['files'][0]['is_archive'] + assert not response.json['result']['files'][0]['unpack_archive'] + + rel_path = response.json['result']['files'][0]['relative_path'] + assert rel_path.startswith(datapack_tar.name) and 'unpacked' in rel_path diff --git a/tests/service/test_dataset_views.py b/tests/service/test_dataset_views.py new file mode 100644 index 0000000000..fc0b341719 --- /dev/null +++ b/tests/service/test_dataset_views.py @@ -0,0 +1,432 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service dataset view tests.""" +import io +import json +import uuid + +import pytest + +from renku.service.config import INVALID_HEADERS_ERROR_CODE, \ + INVALID_PARAMS_ERROR_CODE, RENKU_EXCEPTION_ERROR_CODE + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view(svc_client_with_repo): + """Create new dataset successfully.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view_dataset_exists(svc_client_with_repo): + """Create new dataset which already exists.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + assert RENKU_EXCEPTION_ERROR_CODE == response.json['error']['code'] + assert 'Dataset exists' in response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view_unknown_param(svc_client_with_repo): + """Create new dataset by specifying unknown parameters.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'remote_name': 'origin' + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert {'remote_name'} == set(response.json['error']['reason'].keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_with_no_identity(svc_client_with_repo): + """Create new dataset with no identification provided.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'remote_name': 'origin', + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers={'Content-Type': headers['Content-Type']} + # no user identity, expect error + ) + + assert response + assert {'error'} == response.json.keys() + + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + assert 'user identification is missing' == response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_file_view_with_no_identity(svc_client_with_repo): + """Check identity error raise in dataset add.""" + svc_client, headers, project_id = svc_client_with_repo + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'remote_name': 'origin', + } + + response = svc_client.post( + '/datasets/add', + data=json.dumps(payload), + headers={'Content-Type': headers['Content-Type']} + # no user identity, expect error + ) + assert response + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + assert 'user identification is missing' == response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_file_view(svc_client_with_repo): + """Check adding of uploaded file to dataset.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + query_string={'override_existing': True}, + headers={'Authorization': headers['Authorization']} + ) + + assert response + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + file_id = response.json['result']['files'][0]['file_id'] + assert isinstance(uuid.UUID(file_id), uuid.UUID) + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'file_id': file_id, + } + + response = svc_client.post( + '/datasets/add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'file_id', + 'project_id'} == set(response.json['result'].keys()) + assert file_id == response.json['result']['file_id'] + + +@pytest.mark.service +@pytest.mark.integration +def test_list_datasets_view(svc_client_with_repo): + """Check listing of existing datasets.""" + svc_client, headers, project_id = svc_client_with_repo + + params = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets/list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'datasets'} == set(response.json['result'].keys()) + assert 0 != len(response.json['result']['datasets']) + assert {'identifier', 'name', 'version', + 'created'} == set(response.json['result']['datasets'][0].keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_list_datasets_view_no_auth(svc_client_with_repo): + """Check listing of existing datasets with no auth.""" + svc_client, headers, project_id = svc_client_with_repo + + params = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets/list', + query_string=params, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_create_and_list_datasets_view(svc_client_with_repo): + """Create and list created dataset.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + params_list = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets/list', + query_string=params_list, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'datasets'} == set(response.json['result'].keys()) + assert 0 != len(response.json['result']['datasets']) + assert {'identifier', 'name', 'version', + 'created'} == set(response.json['result']['datasets'][0].keys()) + + assert payload['dataset_name'] in [ + ds['name'] for ds in response.json['result']['datasets'] + ] + + +@pytest.mark.service +@pytest.mark.integration +def test_list_dataset_files(svc_client_with_repo): + """Check listing of dataset files""" + svc_client, headers, project_id = svc_client_with_repo + + file_name = '{0}'.format(uuid.uuid4().hex) + response = svc_client.post( + '/cache/files-upload', + data=dict(file=(io.BytesIO(b'this is a test'), file_name), ), + query_string={'override_existing': True}, + headers={'Authorization': headers['Authorization']} + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + file_id = response.json['result']['files'][0]['file_id'] + assert isinstance(uuid.UUID(file_id), uuid.UUID) + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'file_id': file_id, + } + + response = svc_client.post( + '/datasets/add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'file_id', + 'project_id'} == set(response.json['result'].keys()) + assert file_id == response.json['result']['file_id'] + + params = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + } + + response = svc_client.get( + '/datasets/files-list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files'} == set(response.json['result'].keys()) + + assert params['dataset_name'] == response.json['result']['dataset_name'] + assert file_name in [ + file['name'] for file in response.json['result']['files'] + ] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_with_unpacked_archive(datapack_zip, svc_client_with_repo): + """Upload archive and add it to a dataset.""" + svc_client, headers, project_id = svc_client_with_repo + + response = svc_client.post( + '/cache/files-upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers={'Authorization': headers['Authorization']} + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 2 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + file_id = file_['file_id'] + assert file_id + + file_ = response.json['result']['files'][0] + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/datasets/create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + payload = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + 'file_id': file_['file_id'], + } + + response = svc_client.post( + '/datasets/add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'file_id', + 'project_id'} == set(response.json['result'].keys()) + assert file_['file_id'] == response.json['result']['file_id'] + + params = { + 'project_id': project_id, + 'dataset_name': 'my-dataset', + } + + response = svc_client.get( + '/datasets/files-list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files'} == set(response.json['result'].keys()) + + assert params['dataset_name'] == response.json['result']['dataset_name'] + assert file_['file_name'] in [ + file['name'] for file in response.json['result']['files'] + ] diff --git a/tests/service/test_exceptions.py b/tests/service/test_exceptions.py new file mode 100644 index 0000000000..4e165bdca2 --- /dev/null +++ b/tests/service/test_exceptions.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service exception tests for all endpoints.""" +import pytest + +from renku.service.config import INVALID_HEADERS_ERROR_CODE + + +@pytest.mark.service +def test_allowed_methods_exc(service_allowed_endpoint): + """Check allowed methods for every endpoint.""" + methods, request, svc_client = service_allowed_endpoint + + method = request['allowed_method'] + if method == 'GET': # if GET remove sister method HEAD + methods.pop(method) + methods.pop('HEAD') + else: + methods.pop(method) + + for method, fn in methods.items(): + response = fn(request['url']) + assert 405 == response.status_code + + +@pytest.mark.service +def test_auth_headers_exc(service_allowed_endpoint): + """Check correct headers for every endpoint.""" + methods, request, svc_client = service_allowed_endpoint + + method = request['allowed_method'] + if method == 'GET': # if GET remove sister method HEAD + client_method = methods.pop(method) + methods.pop('HEAD') + else: + client_method = methods.pop(method) + + response = client_method( + request['url'], + headers=request['headers'], + ) + + assert 200 == response.status_code + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + assert 'user identification is missing' == response.json['error']['reason']