Add last version used with example configuration

This commit is contained in:
Alexander Weidinger
2020-05-18 16:27:41 +02:00
commit a8d20b732a
2 changed files with 171 additions and 0 deletions

17
tudown.json Normal file
View File

@@ -0,0 +1,17 @@
[
{
"url": "https://www.moodle.tum.de/course/view.php?id=49093",
"files": [
{
"regex": "Lec\\d+-.*\\.pdf",
"folder": "slides"
},
{
"regex": "A\\d+.*\\.pdf",
"folder": "assignments"
}
],
"username_script": ["/home/alex/Scripts/get-username.sh", "Uni/LRZ"],
"password_script": ["/home/alex/Scripts/get-password.sh", "Uni/LRZ"]
}
]

154
tudown.py Executable file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
import requests
import subprocess
from lxml import html
import json
import re
from requests import utils
from os.path import exists, getmtime
from os import makedirs
from calendar import timegm
from time import strptime, sleep
import sys
def create_filepath(filepath):
if not exists(filepath):
makedirs(filepath)
def download_files(session, f):
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
if not exists(filename):
response = session.get(f[0], allow_redirects=False)
if response.status_code == 301:
download_files(session, (response.headers['Location'], f[1]))
elif response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
fd.write(chunk)
print('[+] ' + filename)
else:
response = session.head(f[0], allow_redirects=False)
if response.status_code == 301:
download_files(session, (response.headers['Location'], f[1]))
elif response.status_code == 200:
last_mod_file = getmtime(filename)
try:
last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
except KeyError:
print('Can\'t check {} for updates.'.format(f[0]))
last_mod_www = last_mod_file
if last_mod_www > last_mod_file:
response = session.get(f[0])
if response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
fd.write(chunk)
print('[M] ' + filename)
def get_moodle_session(username, password):
session = requests.session()
r = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https%3A%2F%2Ftumidp.lrz.de%2Fidp%2Fshibboleth&target=https%3A%2F%2Fwww.moodle.tum.de%2Fauth%2Fshibboleth%2Findex.php')
r = session.post(r.url, data={'j_username':username, 'j_password':password, '_eventId_proceed':''})
html_resp = html.fromstring(r.text)
r = session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':html_resp.forms[0].fields['RelayState'], 'SAMLResponse':html_resp.forms[0].fields['SAMLResponse']})
return session
def get_resource(j, verbose):
# establish a session
session = None
username = None
password = None
# extract username and password
try:
with subprocess.Popen([j['username_script'][0], j['username_script'][1]], stdout=subprocess.PIPE) as proc:
username = proc.stdout.read().strip()
with subprocess.Popen([j['password_script'][0], j['password_script'][1]], stdout=subprocess.PIPE) as proc:
password = proc.stdout.read().strip()
except KeyError:
None
try:
username = j['username']
password = j['password']
except KeyError:
None
if 'moodle.tum.de' in j['url']:
session = get_moodle_session(username, password)
else:
session = requests.session()
try:
session.auth = (username, password)
except (KeyError):
None
# separate url
protocol, url = j['url'].split('://', 1)
hostname, path = url.split('/', 1)
# download
r = session.get(j['url'])
hrefs = html.fromstring(r.text).xpath('//a/@href')
abs_hrefs = []
for href in hrefs:
# strip ./ from relative href
if href.startswith('./'):
href = href[2:]
if href.startswith('https://www.moodle.tum.de/mod/resource/view.php?id='):
tmp = session.head(href).headers
abs_hrefs.append(tmp['Location'])
continue
if href.startswith('/'):
abs_hrefs.append(protocol + '://' + hostname + href)
continue
if href.startswith('http://') or href.startswith('https://'):
abs_hrefs.append(href)
continue
if href.startswith('../'):
abs_hrefs.append(j['url'].rsplit('/', 1)[0].rsplit('/', 1)[0] + '/' + href[3:])
continue
# else
abs_hrefs.append(j['url'].rsplit('/', 1)[0] + '/' + href)
if verbose:
print(abs_hrefs)
print([href for href in hrefs if '.pdf' in href])
for href in abs_hrefs:
for file in j['files']:
if not 'regex' in file:
download_files(session, [href, file['folder']])
break
if re.search(file['regex'], href):
download_files(session, [href, file['folder']])
break
def main():
if not exists('tudown.json'):
print('tudown.json not found')
sys.exit(1)
else:
verbose = False
# verbose mode
if len(sys.argv) > 1:
if sys.argv[1] == '-v':
verbose = True
# open download configuration into json
fh = open('tudown.json', 'r')
j = json.loads(fh.read())
fh.close()
for resource in j:
get_resource(resource, verbose)
if __name__ == '__main__':
main()