ScrapExplorer - main.py

Home / tools / utils / DLx / src Lines: 1 | Size: 2211 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)]
[FILE BEGIN]
1# SPDX-License-Identifier: GPL-3.0 2# DLX 3# 4# Bulk download tool 5# 6# COPYRIGHT NOTICE 7# Copyright (C) 2025 0x4248 and contributors 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the license is not changed. 10# 11# This software is free and open source. Licensed under the GNU general 12# public license version 3.0 as published by the Free Software Foundation. 13 14import os 15import importlib 16 17import argparse 18 19def download_files_from_tsv(tsv_path, output_dir, client): 20 if not os.path.exists(output_dir): 21 os.makedirs(output_dir) 22 23 with open(tsv_path, 'r') as file: 24 for line in file: 25 full_url, relative_path = line.strip().split('\t') 26 local_path = os.path.join(output_dir, relative_path) 27 28 os.makedirs(os.path.dirname(local_path), exist_ok=True) 29 30 print(f"Downloading {full_url} to {local_path}") 31 client.download(full_url, local_path) 32 33 34if __name__ == '__main__': 35 parser = argparse.ArgumentParser(description='Fetch or download files from a TSV file') 36 subparsers = parser.add_subparsers(dest='command', required=True) 37 38 fetch_parser = subparsers.add_parser('fetch', help='Fetch files from a URL') 39 fetch_parser.add_argument('url', help='The URL to fetch files from') 40 fetch_parser.add_argument('driver', help='The driver to use for fetching files') 41 42 download_parser = subparsers.add_parser('download', help='Download files from a TSV file') 43 download_parser.add_argument('tsv_path', help='The path to the TSV file') 44 download_parser.add_argument('output_dir', help='The directory to save the downloaded files') 45 download_parser.add_argument('client_backend', help='The client backend to use for downloading files') 46 47 args = parser.parse_args() 48 49 if args.command == 'fetch': 50 driver = getattr(importlib.import_module(f'drivers.{args.driver}'), 'Driver') 51 driver.fetch(args.url, 'output.tsv') 52 exit(0) 53 54 if args.command == 'download': 55 client = getattr(importlib.import_module(f'clients.{args.client_backend}'), 'Client')() 56 download_files_from_tsv(args.tsv_path, args.output_dir, client) 57 exit(0)
[FILE END]
(C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.