Add last version used with example configuration
This commit is contained in:
17
tudown.json
Normal file
17
tudown.json
Normal file
@@ -0,0 +1,17 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.moodle.tum.de/course/view.php?id=49093",
|
||||
"files": [
|
||||
{
|
||||
"regex": "Lec\\d+-.*\\.pdf",
|
||||
"folder": "slides"
|
||||
},
|
||||
{
|
||||
"regex": "A\\d+.*\\.pdf",
|
||||
"folder": "assignments"
|
||||
}
|
||||
],
|
||||
"username_script": ["/home/alex/Scripts/get-username.sh", "Uni/LRZ"],
|
||||
"password_script": ["/home/alex/Scripts/get-password.sh", "Uni/LRZ"]
|
||||
}
|
||||
]
|
||||
154
tudown.py
Executable file
154
tudown.py
Executable file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
import requests
|
||||
import subprocess
|
||||
from lxml import html
|
||||
import json
|
||||
import re
|
||||
from requests import utils
|
||||
from os.path import exists, getmtime
|
||||
from os import makedirs
|
||||
from calendar import timegm
|
||||
from time import strptime, sleep
|
||||
import sys
|
||||
|
||||
def create_filepath(filepath):
|
||||
if not exists(filepath):
|
||||
makedirs(filepath)
|
||||
|
||||
def download_files(session, f):
|
||||
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
|
||||
if not exists(filename):
|
||||
response = session.get(f[0], allow_redirects=False)
|
||||
if response.status_code == 301:
|
||||
download_files(session, (response.headers['Location'], f[1]))
|
||||
elif response.status_code == 200:
|
||||
create_filepath(f[1])
|
||||
with open(filename, 'wb') as fd:
|
||||
for chunk in response.iter_content(1024):
|
||||
fd.write(chunk)
|
||||
print('[+] ' + filename)
|
||||
else:
|
||||
response = session.head(f[0], allow_redirects=False)
|
||||
if response.status_code == 301:
|
||||
download_files(session, (response.headers['Location'], f[1]))
|
||||
elif response.status_code == 200:
|
||||
last_mod_file = getmtime(filename)
|
||||
try:
|
||||
last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
|
||||
except KeyError:
|
||||
print('Can\'t check {} for updates.'.format(f[0]))
|
||||
last_mod_www = last_mod_file
|
||||
|
||||
if last_mod_www > last_mod_file:
|
||||
response = session.get(f[0])
|
||||
if response.status_code == 200:
|
||||
create_filepath(f[1])
|
||||
with open(filename, 'wb') as fd:
|
||||
for chunk in response.iter_content(1024):
|
||||
fd.write(chunk)
|
||||
print('[M] ' + filename)
|
||||
|
||||
def get_moodle_session(username, password):
|
||||
session = requests.session()
|
||||
r = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https%3A%2F%2Ftumidp.lrz.de%2Fidp%2Fshibboleth&target=https%3A%2F%2Fwww.moodle.tum.de%2Fauth%2Fshibboleth%2Findex.php')
|
||||
r = session.post(r.url, data={'j_username':username, 'j_password':password, '_eventId_proceed':''})
|
||||
html_resp = html.fromstring(r.text)
|
||||
r = session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':html_resp.forms[0].fields['RelayState'], 'SAMLResponse':html_resp.forms[0].fields['SAMLResponse']})
|
||||
return session
|
||||
|
||||
def get_resource(j, verbose):
|
||||
# establish a session
|
||||
session = None
|
||||
username = None
|
||||
password = None
|
||||
|
||||
# extract username and password
|
||||
try:
|
||||
with subprocess.Popen([j['username_script'][0], j['username_script'][1]], stdout=subprocess.PIPE) as proc:
|
||||
username = proc.stdout.read().strip()
|
||||
with subprocess.Popen([j['password_script'][0], j['password_script'][1]], stdout=subprocess.PIPE) as proc:
|
||||
password = proc.stdout.read().strip()
|
||||
except KeyError:
|
||||
None
|
||||
|
||||
try:
|
||||
username = j['username']
|
||||
password = j['password']
|
||||
except KeyError:
|
||||
None
|
||||
|
||||
if 'moodle.tum.de' in j['url']:
|
||||
session = get_moodle_session(username, password)
|
||||
else:
|
||||
session = requests.session()
|
||||
try:
|
||||
session.auth = (username, password)
|
||||
except (KeyError):
|
||||
None
|
||||
|
||||
# separate url
|
||||
protocol, url = j['url'].split('://', 1)
|
||||
hostname, path = url.split('/', 1)
|
||||
|
||||
|
||||
# download
|
||||
r = session.get(j['url'])
|
||||
hrefs = html.fromstring(r.text).xpath('//a/@href')
|
||||
abs_hrefs = []
|
||||
|
||||
for href in hrefs:
|
||||
# strip ./ from relative href
|
||||
if href.startswith('./'):
|
||||
href = href[2:]
|
||||
|
||||
if href.startswith('https://www.moodle.tum.de/mod/resource/view.php?id='):
|
||||
tmp = session.head(href).headers
|
||||
abs_hrefs.append(tmp['Location'])
|
||||
continue
|
||||
if href.startswith('/'):
|
||||
abs_hrefs.append(protocol + '://' + hostname + href)
|
||||
continue
|
||||
if href.startswith('http://') or href.startswith('https://'):
|
||||
abs_hrefs.append(href)
|
||||
continue
|
||||
if href.startswith('../'):
|
||||
abs_hrefs.append(j['url'].rsplit('/', 1)[0].rsplit('/', 1)[0] + '/' + href[3:])
|
||||
continue
|
||||
# else
|
||||
abs_hrefs.append(j['url'].rsplit('/', 1)[0] + '/' + href)
|
||||
|
||||
if verbose:
|
||||
print(abs_hrefs)
|
||||
print([href for href in hrefs if '.pdf' in href])
|
||||
|
||||
for href in abs_hrefs:
|
||||
for file in j['files']:
|
||||
if not 'regex' in file:
|
||||
download_files(session, [href, file['folder']])
|
||||
break
|
||||
if re.search(file['regex'], href):
|
||||
download_files(session, [href, file['folder']])
|
||||
break
|
||||
|
||||
def main():
|
||||
if not exists('tudown.json'):
|
||||
print('tudown.json not found')
|
||||
sys.exit(1)
|
||||
else:
|
||||
verbose = False
|
||||
# verbose mode
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == '-v':
|
||||
verbose = True
|
||||
|
||||
# open download configuration into json
|
||||
fh = open('tudown.json', 'r')
|
||||
j = json.loads(fh.read())
|
||||
fh.close()
|
||||
|
||||
for resource in j:
|
||||
get_resource(resource, verbose)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user