155 lines
4.7 KiB
Python
Executable File
155 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import requests
|
|
import subprocess
|
|
from lxml import html
|
|
import json
|
|
import re
|
|
from requests import utils
|
|
from os.path import exists, getmtime
|
|
from os import makedirs
|
|
from calendar import timegm
|
|
from time import strptime, sleep
|
|
import sys
|
|
|
|
def create_filepath(filepath):
|
|
if not exists(filepath):
|
|
makedirs(filepath)
|
|
|
|
def download_files(session, f):
|
|
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
|
|
if not exists(filename):
|
|
response = session.get(f[0], allow_redirects=False)
|
|
if response.status_code == 301:
|
|
download_files(session, (response.headers['Location'], f[1]))
|
|
elif response.status_code == 200:
|
|
create_filepath(f[1])
|
|
with open(filename, 'wb') as fd:
|
|
for chunk in response.iter_content(1024):
|
|
fd.write(chunk)
|
|
print('[+] ' + filename)
|
|
else:
|
|
response = session.head(f[0], allow_redirects=False)
|
|
if response.status_code == 301:
|
|
download_files(session, (response.headers['Location'], f[1]))
|
|
elif response.status_code == 200:
|
|
last_mod_file = getmtime(filename)
|
|
try:
|
|
last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
|
|
except KeyError:
|
|
print('Can\'t check {} for updates.'.format(f[0]))
|
|
last_mod_www = last_mod_file
|
|
|
|
if last_mod_www > last_mod_file:
|
|
response = session.get(f[0])
|
|
if response.status_code == 200:
|
|
create_filepath(f[1])
|
|
with open(filename, 'wb') as fd:
|
|
for chunk in response.iter_content(1024):
|
|
fd.write(chunk)
|
|
print('[M] ' + filename)
|
|
|
|
def get_moodle_session(username, password):
|
|
session = requests.session()
|
|
r = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https%3A%2F%2Ftumidp.lrz.de%2Fidp%2Fshibboleth&target=https%3A%2F%2Fwww.moodle.tum.de%2Fauth%2Fshibboleth%2Findex.php')
|
|
r = session.post(r.url, data={'j_username':username, 'j_password':password, '_eventId_proceed':''})
|
|
html_resp = html.fromstring(r.text)
|
|
r = session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':html_resp.forms[0].fields['RelayState'], 'SAMLResponse':html_resp.forms[0].fields['SAMLResponse']})
|
|
return session
|
|
|
|
def get_resource(j, verbose):
|
|
# establish a session
|
|
session = None
|
|
username = None
|
|
password = None
|
|
|
|
# extract username and password
|
|
try:
|
|
with subprocess.Popen([j['username_script'][0], j['username_script'][1]], stdout=subprocess.PIPE) as proc:
|
|
username = proc.stdout.read().strip()
|
|
with subprocess.Popen([j['password_script'][0], j['password_script'][1]], stdout=subprocess.PIPE) as proc:
|
|
password = proc.stdout.read().strip()
|
|
except KeyError:
|
|
None
|
|
|
|
try:
|
|
username = j['username']
|
|
password = j['password']
|
|
except KeyError:
|
|
None
|
|
|
|
if 'moodle.tum.de' in j['url']:
|
|
session = get_moodle_session(username, password)
|
|
else:
|
|
session = requests.session()
|
|
try:
|
|
session.auth = (username, password)
|
|
except (KeyError):
|
|
None
|
|
|
|
# separate url
|
|
protocol, url = j['url'].split('://', 1)
|
|
hostname, path = url.split('/', 1)
|
|
|
|
|
|
# download
|
|
r = session.get(j['url'])
|
|
hrefs = html.fromstring(r.text).xpath('//a/@href')
|
|
abs_hrefs = []
|
|
|
|
for href in hrefs:
|
|
# strip ./ from relative href
|
|
if href.startswith('./'):
|
|
href = href[2:]
|
|
|
|
if href.startswith('https://www.moodle.tum.de/mod/resource/view.php?id='):
|
|
tmp = session.head(href).headers
|
|
abs_hrefs.append(tmp['Location'])
|
|
continue
|
|
if href.startswith('/'):
|
|
abs_hrefs.append(protocol + '://' + hostname + href)
|
|
continue
|
|
if href.startswith('http://') or href.startswith('https://'):
|
|
abs_hrefs.append(href)
|
|
continue
|
|
if href.startswith('../'):
|
|
abs_hrefs.append(j['url'].rsplit('/', 1)[0].rsplit('/', 1)[0] + '/' + href[3:])
|
|
continue
|
|
# else
|
|
abs_hrefs.append(j['url'].rsplit('/', 1)[0] + '/' + href)
|
|
|
|
if verbose:
|
|
print(abs_hrefs)
|
|
print([href for href in hrefs if '.pdf' in href])
|
|
|
|
for href in abs_hrefs:
|
|
for file in j['files']:
|
|
if not 'regex' in file:
|
|
download_files(session, [href, file['folder']])
|
|
break
|
|
if re.search(file['regex'], href):
|
|
download_files(session, [href, file['folder']])
|
|
break
|
|
|
|
def main():
|
|
if not exists('tudown.json'):
|
|
print('tudown.json not found')
|
|
sys.exit(1)
|
|
else:
|
|
verbose = False
|
|
# verbose mode
|
|
if len(sys.argv) > 1:
|
|
if sys.argv[1] == '-v':
|
|
verbose = True
|
|
|
|
# open download configuration into json
|
|
fh = open('tudown.json', 'r')
|
|
j = json.loads(fh.read())
|
|
fh.close()
|
|
|
|
for resource in j:
|
|
get_resource(resource, verbose)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|