From 0fb433d325af383a35f8c79e132bba623379562b Mon Sep 17 00:00:00 2001 From: Alexander Weidinger Date: Tue, 14 Apr 2015 14:54:14 +0200 Subject: [PATCH] new (and better) version. --- everything.py | 85 ----------------------------------------- tudown.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++ update.py.example | 32 ++++++++++++++++ 3 files changed, 129 insertions(+), 85 deletions(-) delete mode 100644 everything.py create mode 100644 tudown.py create mode 100755 update.py.example diff --git a/everything.py b/everything.py deleted file mode 100644 index a961fe4..0000000 --- a/everything.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/python3 -from requests import Session -from lxml import html -from re import compile, findall, match -from os import makedirs -from os.path import exists, getmtime -from calendar import timegm -from time import strptime - -AUTH_NONE = 0 -AUTH_MOODLE = 1 - -_regex = compile('https\:\/\/www\.moodle\.tum\.de\/mod\/resource\/view\.php\?id\=\d{6}') -_is_file = compile('\.(pdf|txt|py|c|jar)') - -def create_filepath(filepath): - if not exists(filepath): - makedirs(filepath) - -def download_file(session, url, filepath): - filename = filepath + url[url.rindex('/'):] #ugly as fuck - - if not exists(filename): - print('[+] ' + filename) - req = session.get(url) - with open(filename, 'wb') as fh: - for chunk in req.iter_content(): - fh.write(chunk) - else: - last_mod_file = getmtime(filename) - last_mod_www = timegm(strptime(session.head(url).headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z')) - - if last_mod_www > last_mod_file: - print('[M] ' + filename) - req = session.get(url) - with open(filename, 'wb') as fh: - for chunk in req.iter_content(): - fh.write(chunk) - -def resolve_direct_link(session, url): - return session.head(url).headers['Location'] - -def get_file_links(session, url, base=''): - links = [] - handle = session.get(url).text - - if url.startswith('https://www.moodle.tum.de/course/'): - for match in findall(_regex, handle): - links.append(resolve_direct_link(session, match)) - else: - hrefs = html.fromstring(handle).xpath('//a/@href') - for href in hrefs: - if _is_file.findall(href) != []: # is file link? - links.append(base + href) - - return links - -def get_moodle_session(user, passwd): - session = Session() - - session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php') - resp = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword', data={'j_username':user, 'j_password':passwd}) - - parsed = html.fromstring(resp.text) - - session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':parsed.forms[0].fields['RelayState'], 'SAMLResponse':parsed.forms[0].fields['SAMLResponse']}) - - return session - -def main(mode, url, files, user='', passwd='', base=''): - session = None - if mode == AUTH_MOODLE: - session = get_moodle_session(user, passwd) - else: - session = Session() - - links = get_file_links(session, url, base) - - for link in links: - for ft in files: - reg = compile(ft[0]) - match = reg.findall(link) - if match != []: - create_filepath(ft[1]) - download_file(session, link, ft[1]) diff --git a/tudown.py b/tudown.py new file mode 100644 index 0000000..e38b63a --- /dev/null +++ b/tudown.py @@ -0,0 +1,97 @@ +#!/usr/bin/python3 +from requests import Session, utils +from lxml import html +from re import compile, findall, match +from os import makedirs +from os.path import exists, getmtime +from calendar import timegm +from time import strptime + +def create_filepath(filepath): + if not exists(filepath): + makedirs(filepath) + +def download_files(session, files): + for f in files: + filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):] + if not exists(filename): + response = session.get(f[0]) + if response.status_code == 200: + create_filepath(f[1]) + with open(filename, 'wb') as f: + for chunk in response.iter_content(1024): + f.write(chunk) + print('[+] ' + filename) + else: + response = session.head(f[0]) + if response.status_code == 200: + last_mod_file = getmtime(filename) + last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z')) + if last_mod_www > last_mod_file: + response = session.get(f[0]) + if response.status_code == 200: + create_filepath(f[1]) + with open(filename, 'wb') as f: + for chunk in response.iter_content(1024): + f.write(chunk) + print('[M] ' + filename) + +def resolve_direct_links(session, hrefs): + links = [] + for href in hrefs: + links.append(session.head(href).headers['Location']) + return links + +def get_file_links(session, url, files): + links = [] + + response = session.get(url) + + if 'www.moodle.tum.de' in url: + hrefs = findall(compile('https\:\/\/www\.moodle\.tum\.de\/mod\/resource\/view\.php\?id\=\d{6}'), response.text) + hrefs = resolve_direct_links(session, hrefs) + else: + hrefs = html.fromstring(response.text).xpath('//a/@href') + + + for f in files: + reg = compile(f[0]) + for href in hrefs: + match = reg.findall(href) + if match: + if not ('https://' in href or 'http://' in href): + links.append((url + href, f[1])) + else: + links.append((href, f[1])) + + return links + +def establish_moodle_session(user, passwd): + session = Session() + + session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php') + response = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword', data={'j_username':user, 'j_password':passwd}) + + parsed = html.fromstring(response.text) + + session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':parsed.forms[0].fields['RelayState'], 'SAMLResponse':parsed.forms[0].fields['SAMLResponse']}) + + return session + +def main(url, files, user='', passwd=''): + # create session + session = None + if 'www.moodle.tum.de' in url: + session = establish_moodle_session(user, passwd) + else: + session = Session() + session.auth = (user, passwd) + session.headers = { + "Accept-Language": "de-DE,de;" + } + + # get file links + links = get_file_links(session, url, files) + + # download files + download_files(session, links) diff --git a/update.py.example b/update.py.example new file mode 100755 index 0000000..6e016d3 --- /dev/null +++ b/update.py.example @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +import tudown + +user = '' +passwd = '' + +# +--------+ +# | Skript | +# +--------+ + +url = 'http://wwwmayr.informatik.tu-muenchen.de/lehre/2015SS/theo/' + +files = [ + ('2015-theo\.pdf', 'Skript'), + ('2015-\d{2}-\d{2}\.pdf', 'Skript'), +] + +tudown.main(url, files) + +# +-------+ +# | Übung | +# +-------+ + +url = 'http://wwwmayr.informatik.tu-muenchen.de/lehre/2015SS/theo/uebung/' + +files = [ + ('ue\d*\.pdf', 'Übungsblätter'), + ('loesungen/lo\d*\.pdf', 'Lösungsblätter'), + ('theo15zue\d*_druck\.pdf', 'Skript/ZÜ'), +] + +tudown.main(url, files, user=user, passwd=passwd)