Skip to content

Commit

Permalink
feat: binary captch image renderer
Browse files Browse the repository at this point in the history
  • Loading branch information
xyb committed Aug 14, 2023
1 parent a605e21 commit bb97d5a
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 49 deletions.
40 changes: 26 additions & 14 deletions task/baidupcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import PurePosixPath
from time import sleep
from typing import Callable
from typing import Dict
from typing import List

from baidupcs_py.baidupcs import BaiduPCSApi
Expand Down Expand Up @@ -37,12 +38,17 @@ def get_baidupcs_client() -> "BaiduPCSClient":


class BaiduPCSClient:
def __init__(self, bduss, cookies, api=None):
def __init__(self, bduss: str, cookies: Dict[str, str], api: "BaiduPCSApi" = None):
self.bduss = bduss
self.cookies = cookies
self.api = api if api else BaiduPCSApi(bduss=bduss, cookies=cookies)

def list_files(self, remote_dir, retry=3, fail_silent=False):
def list_files(
self,
remote_dir: str,
retry: int = 3,
fail_silent: bool = False,
) -> List[dict]:
while True:
try:
files = self.api.list(remote_dir, recursive=True)
Expand Down Expand Up @@ -74,13 +80,13 @@ def list_files(self, remote_dir, retry=3, fail_silent=False):

def save_shared_link(
self,
remote_dir,
link,
password=None,
callback_save_captcha=None,
remote_dir: str,
link: str,
password: str = None,
callback_save_captcha: Callable[[str, str, bytes], None] = None,
captcha_id: str = "",
captcha_code: str = "",
):
) -> None:
save_shared(
self,
link,
Expand All @@ -91,7 +97,7 @@ def save_shared_link(
captcha_code=captcha_code,
)

def download_dir(self, remote_dir, local_dir, sample_size=0):
def download_dir(self, remote_dir: str, local_dir: str, sample_size: int = 0):
for file in self.list_files(remote_dir):
if not file["is_file"]:
continue
Expand All @@ -101,7 +107,13 @@ def download_dir(self, remote_dir, local_dir, sample_size=0):
file_size = file["size"]
self.download_file(remote_path, local_dir_, file_size, sample_size)

def download_file(self, remote_path, local_dir, file_size, sample_size=0):
def download_file(
self,
remote_path: str,
local_dir: str,
file_size: str,
sample_size: int = 0,
) -> int:
local_path = Path(local_dir) / basename(remote_path)
logger.info(f" {remote_path} -> {local_path}")

Expand All @@ -113,12 +125,12 @@ def download_file(self, remote_path, local_dir, file_size, sample_size=0):
not sample_size and file_size == getsize(local_path)
):
logger.info(f"{local_path} is ready existed.")
return
return 0

url = self.api.download_link(remote_path)
if not url:
logger.info(remote_path)
return
return 0

headers = {
"Cookie": f"BDUSS={self.cookies['BDUSS']};",
Expand All @@ -129,21 +141,21 @@ def download_file(self, remote_path, local_dir, file_size, sample_size=0):
total = download_url(local_path, url, headers, limit=sample_size)
return total

def leech(self, remote_dir, local_dir, sample_size=0):
def leech(self, remote_dir: str, local_dir: str, sample_size=0) -> None:
if not local_dir.exists():
makedirs(local_dir, exist_ok=True)

self.download_dir(remote_dir, local_dir, sample_size=sample_size)

def exists(self, remote_dir):
def exists(self, remote_dir: str) -> bool:
if remote_dir.endswith("/"):
remote_dir = remote_dir[:-1]
if not remote_dir: # should be "/"
return True
root, name = split(remote_dir)
return remotepath_exists(self.api, name, root)

def delete(self, remote_dir):
def delete(self, remote_dir: str) -> None:
self.api.remove(remote_dir)


Expand Down
14 changes: 7 additions & 7 deletions task/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def set_files(self, files) -> None:
def load_files(self) -> List[dict]:
return loads(self.files or "[]") or []

def list_remote_files(self, files_only: bool = True) -> [dict]:
def list_remote_files(self, files_only: bool = True) -> List[dict]:
if not self.files:
return []
files = loads(self.files)
Expand All @@ -149,7 +149,7 @@ def list_remote_files(self, files_only: bool = True) -> [dict]:
def remote_files(self) -> List[dict]:
return self.list_remote_files(files_only=True)

def list_local_files(self) -> [dict]:
def list_local_files(self) -> List[dict]:
data_path = self.data_path
for root, dirs, files in walk(data_path):
for file in files:
Expand Down Expand Up @@ -193,24 +193,24 @@ def largest_file_size(self) -> int:
return size

@classmethod
def filter_ready_to_transfer(cls):
def filter_ready_to_transfer(cls) -> List["Task"]:
inited = Q(status=cls.Status.INITED)
tasks = cls.objects.filter(inited)
return tasks

@classmethod
def filter_transferd(cls):
def filter_transferd(cls) -> List["Task"]:
return cls.objects.filter(status=cls.Status.TRANSFERRED)

@classmethod
def filter_sampling_downloaded(cls):
def filter_sampling_downloaded(cls) -> List["Task"]:
return cls.objects.filter(
status=cls.Status.SAMPLING_DOWNLOADED,
full_download_now=True,
)

@classmethod
def filter_failed(cls):
def filter_failed(cls) -> List["Task"]:
return cls.objects.filter(failed=True)

@property
Expand All @@ -231,7 +231,7 @@ def restart_downloading(self) -> None:
self.inc_retry_times()
self.save()

def get_steps(self) -> tuple:
def get_steps(self) -> tuple[str, str]:
found_current = False
status = self.Status
if not self.failed:
Expand Down
62 changes: 34 additions & 28 deletions task/views.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import logging
from io import BytesIO
from typing import Any

from baidupcs_py.baidupcs import BaiduPCSError
from django.http import HttpRequest
from django.http import HttpResponse
from django_filters import rest_framework as filters
from drf_spectacular.utils import extend_schema
from rest_framework import mixins
from rest_framework import status
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.decorators import renderer_classes
from rest_framework.renderers import BaseRenderer
from rest_framework.response import Response
from rest_framework.serializers import Serializer

from .baidupcs import get_baidupcs_client
from .leecher import transfer
Expand All @@ -21,11 +27,20 @@
logger = logging.getLogger(__name__)


class JPEGRenderer(BaseRenderer):
media_type = "image/jpeg"
format = "jpg"
charset = None
render_style = "binary"

def render(self, data, accepted_media_type=None, renderer_context=None):
return data


def delete_remote_files(
task_id,
remote_path,
catch_error=True,
):
remote_path: str,
catch_error: bool = True,
) -> HttpResponse:
try:
client = get_baidupcs_client()
if client.exists(remote_path):
Expand All @@ -50,36 +65,32 @@ class TaskViewSet(
filter_backends = (filters.DjangoFilterBackend,)
filterset_fields = ("shared_link", "shared_id", "status", "failed")

def list(self, request):
return super().list(request)

@action(methods=["get", "delete"], detail=True, name="Remote Files")
def files(self, request, pk=None):
def files(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
if request.method == "GET":
return Response(task.load_files())
if request.method == "DELETE":
return delete_remote_files(
task.id,
task.remote_path,
)
return delete_remote_files(task.remote_path)

@action(methods=["get", "delete"], detail=True, name="Local Files")
def local_files(self, request, pk=None):
def local_files(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
if request.method == "GET":
return Response(task.list_local_files())
if request.method == "DELETE":
task.delete_files()
return Response(status=status.HTTP_204_NO_CONTENT)

@extend_schema(responses={200: bytes, 404: None})
@action(detail=True, name="Captch Image")
def captcha(self, request, pk=None):
@renderer_classes([JPEGRenderer])
def captcha(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
return HttpResponse(BytesIO(task.captcha), content_type="image/jpeg")
return HttpResponse(BytesIO(task.captcha), content_type=JPEGRenderer.media_type)

@action(methods=["post"], detail=True, name="Input Captcha Code")
def captcha_code(self, request, pk=None):
def captcha_code(self, request: HttpRequest, pk: int = None) -> HttpResponse:
serializer = CaptchaCodeSerializer(data=request.data)
serializer.is_valid(raise_exception=True)

Expand All @@ -103,7 +114,7 @@ def captcha_code(self, request, pk=None):
return Response(TaskSerializer(task).data)

@action(methods=["post"], detail=True, name="Approve to download whole files")
def full_download_now(self, request, pk=None):
def full_download_now(self, request: HttpRequest, pk: int = None) -> HttpResponse:
serializer = FullDownloadNowSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task = self.get_object()
Expand All @@ -112,38 +123,33 @@ def full_download_now(self, request, pk=None):
return Response(TaskSerializer(task).data)

@action(methods=["post"], detail=True, name="Restart task to downloading files")
def restart_downloading(self, request, pk=None):
def restart_downloading(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
task.restart_downloading()
return Response({"status": task.status})

@action(methods=["post"], detail=True, name="Restart task from inited status")
def restart(self, request, pk=None):
def restart(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
task.restart()
return Response({"status": task.status})

@action(methods=["post"], detail=True, name="Resume failed task")
def resume(self, request, pk=None):
def resume(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
task.schedule_resume()
return Response({"status": task.status})

@action(methods=["delete"], detail=True, name="Erase task, remote and local files")
def erase(self, request, pk=None):
def erase(self, request: HttpRequest, pk: int = None) -> HttpResponse:
task = self.get_object()
task_id = task.id
task.erase()
try:
return delete_remote_files(
task_id,
task.remote_path,
catch_error=False,
)
return delete_remote_files(task.remote_path, catch_error=False)
except BaiduPCSError as exc:
return Response({"error": str(exc)}, status=status.HTTP_400_BAD_REQUEST)

def get_serializer(self, *args, **kwargs):
def get_serializer(self, *args: Any, **kwargs: Any) -> Serializer:
if self.action == "captcha_code":
return CaptchaCodeSerializer(*args, **kwargs)
if self.action == "full_download_now":
Expand Down

0 comments on commit bb97d5a

Please sign in to comment.