Files
TUDown/tudown.py
2017-02-03 14:32:15 +01:00

145 lines
4.9 KiB
Python
Executable File

#!/usr/bin/python3
from threading import Thread, active_count
from requests import Session, utils
from lxml import html
from re import compile, findall
from os import makedirs
from os.path import exists, getmtime
from calendar import timegm
from time import strptime, sleep
from urllib.parse import unquote
NUM_THREADS = 5 # max number of threads
def create_filepath(filepath):
if not exists(filepath):
makedirs(filepath)
def download_files(session, f):
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
if not exists(filename):
response = session.get(f[0], allow_redirects=False)
if response.status_code == 301:
download_files(session, (response.headers['Location'], f[1]))
elif response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
fd.write(chunk)
print('[+] ' + filename)
else:
response = session.head(f[0], allow_redirects=False)
if response.status_code == 301:
download_files(session, (response.headers['Location'], f[1]))
elif response.status_code == 200:
last_mod_file = getmtime(filename)
try:
last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
except KeyError:
print('Can\'t check {} for updates.'.format(f[0]))
last_mod_www = last_mod_file
if last_mod_www > last_mod_file:
response = session.get(f[0])
if response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
fd.write(chunk)
print('[M] ' + filename)
def resolve_direct_links(session, hrefs):
links = []
for href in hrefs:
tmp = session.head(href).headers
if 'Location' in tmp:
links.append(tmp['Location'])
return links
def resolve_direct_link(session, href):
return resolve_direct_links(session, [href])[0]
def get_links_from_folder(session, urls):
hrefs = []
for url in urls:
response = session.get(url)
hrefs += findall(compile(
'https\:\/\/www\.moodle\.tum\.de\/pluginfile\.php\/\d+\/mod_folder\/content\/0\/(?:[\w\d\_\-]*\/)*[\w\d\_\-\.]+'),
response.text)
return hrefs
def get_link_from_folder(session, url):
return get_links_from_folder(session, [url])[0]
def get_file_links(session, url, files):
links = []
response = session.get(url)
hrefs = html.fromstring(response.text).xpath('//a/@href')
for i in range(0, len(hrefs)):
href = hrefs[i]
if 'moodle.tum.de/mod/resource/view.php' in href:
hrefs[i] = resolve_direct_link(session, href)
if 'moodle.tum.de/mod/folder/view.php' in href:
hrefs[i] = get_link_from_folder(session, href)
hrefs = [href.replace('?forcedownload=1', '') for href in hrefs]
# ---------------
url = url.replace('index.html.en','').replace('index.html.de','')
for f in files:
reg = compile(f[0])
for href in hrefs:
match = reg.findall(unquote(href))
if match:
if not ('https://' in href or 'http://' in href):
links.append((url + href, f[1]))
else:
links.append((href, f[1]))
return links
def establish_moodle_session(user, passwd):
session = Session()
response = session.get('https://www.moodle.tum.de')
response = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
response = session.post('https://tumidp.lrz.de/idp/profile/SAML2/Redirect/SSO?execution=e1s1', data={'j_username': user, 'j_password': passwd, '_eventId_proceed':''})
parsed = html.fromstring(response.text)
session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST',
data={'RelayState': parsed.forms[0].fields['RelayState'],
'SAMLResponse': parsed.forms[0].fields['SAMLResponse']})
return session
def main(url, files, user='', passwd=''):
# create session
if 'www.moodle.tum.de' in url:
session = establish_moodle_session(user, passwd)
else:
session = Session()
session.auth = (user, passwd)
session.headers = {
"Accept-Language": "en-US,en;"
}
# get file links
links = get_file_links(session, url, files)
# download files
worker = []
for l in links:
while active_count() > NUM_THREADS:
sleep(0.1)
worker.append(Thread(target=download_files, args=(session, l)).start())
[t.join() for t in worker if t]