new (and better) version.
This commit is contained in:
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
from requests import Session
|
||||
from lxml import html
|
||||
from re import compile, findall, match
|
||||
from os import makedirs
|
||||
from os.path import exists, getmtime
|
||||
from calendar import timegm
|
||||
from time import strptime
|
||||
|
||||
AUTH_NONE = 0
|
||||
AUTH_MOODLE = 1
|
||||
|
||||
_regex = compile('https\:\/\/www\.moodle\.tum\.de\/mod\/resource\/view\.php\?id\=\d{6}')
|
||||
_is_file = compile('\.(pdf|txt|py|c|jar)')
|
||||
|
||||
def create_filepath(filepath):
|
||||
if not exists(filepath):
|
||||
makedirs(filepath)
|
||||
|
||||
def download_file(session, url, filepath):
|
||||
filename = filepath + url[url.rindex('/'):] #ugly as fuck
|
||||
|
||||
if not exists(filename):
|
||||
print('[+] ' + filename)
|
||||
req = session.get(url)
|
||||
with open(filename, 'wb') as fh:
|
||||
for chunk in req.iter_content():
|
||||
fh.write(chunk)
|
||||
else:
|
||||
last_mod_file = getmtime(filename)
|
||||
last_mod_www = timegm(strptime(session.head(url).headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
|
||||
|
||||
if last_mod_www > last_mod_file:
|
||||
print('[M] ' + filename)
|
||||
req = session.get(url)
|
||||
with open(filename, 'wb') as fh:
|
||||
for chunk in req.iter_content():
|
||||
fh.write(chunk)
|
||||
|
||||
def resolve_direct_link(session, url):
|
||||
return session.head(url).headers['Location']
|
||||
|
||||
def get_file_links(session, url, base=''):
|
||||
links = []
|
||||
handle = session.get(url).text
|
||||
|
||||
if url.startswith('https://www.moodle.tum.de/course/'):
|
||||
for match in findall(_regex, handle):
|
||||
links.append(resolve_direct_link(session, match))
|
||||
else:
|
||||
hrefs = html.fromstring(handle).xpath('//a/@href')
|
||||
for href in hrefs:
|
||||
if _is_file.findall(href) != []: # is file link?
|
||||
links.append(base + href)
|
||||
|
||||
return links
|
||||
|
||||
def get_moodle_session(user, passwd):
|
||||
session = Session()
|
||||
|
||||
session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
|
||||
resp = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword', data={'j_username':user, 'j_password':passwd})
|
||||
|
||||
parsed = html.fromstring(resp.text)
|
||||
|
||||
session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':parsed.forms[0].fields['RelayState'], 'SAMLResponse':parsed.forms[0].fields['SAMLResponse']})
|
||||
|
||||
return session
|
||||
|
||||
def main(mode, url, files, user='', passwd='', base=''):
|
||||
session = None
|
||||
if mode == AUTH_MOODLE:
|
||||
session = get_moodle_session(user, passwd)
|
||||
else:
|
||||
session = Session()
|
||||
|
||||
links = get_file_links(session, url, base)
|
||||
|
||||
for link in links:
|
||||
for ft in files:
|
||||
reg = compile(ft[0])
|
||||
match = reg.findall(link)
|
||||
if match != []:
|
||||
create_filepath(ft[1])
|
||||
download_file(session, link, ft[1])
|
||||
97
tudown.py
Normal file
97
tudown.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/python3
|
||||
from requests import Session, utils
|
||||
from lxml import html
|
||||
from re import compile, findall, match
|
||||
from os import makedirs
|
||||
from os.path import exists, getmtime
|
||||
from calendar import timegm
|
||||
from time import strptime
|
||||
|
||||
def create_filepath(filepath):
|
||||
if not exists(filepath):
|
||||
makedirs(filepath)
|
||||
|
||||
def download_files(session, files):
|
||||
for f in files:
|
||||
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
|
||||
if not exists(filename):
|
||||
response = session.get(f[0])
|
||||
if response.status_code == 200:
|
||||
create_filepath(f[1])
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in response.iter_content(1024):
|
||||
f.write(chunk)
|
||||
print('[+] ' + filename)
|
||||
else:
|
||||
response = session.head(f[0])
|
||||
if response.status_code == 200:
|
||||
last_mod_file = getmtime(filename)
|
||||
last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
|
||||
if last_mod_www > last_mod_file:
|
||||
response = session.get(f[0])
|
||||
if response.status_code == 200:
|
||||
create_filepath(f[1])
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in response.iter_content(1024):
|
||||
f.write(chunk)
|
||||
print('[M] ' + filename)
|
||||
|
||||
def resolve_direct_links(session, hrefs):
|
||||
links = []
|
||||
for href in hrefs:
|
||||
links.append(session.head(href).headers['Location'])
|
||||
return links
|
||||
|
||||
def get_file_links(session, url, files):
|
||||
links = []
|
||||
|
||||
response = session.get(url)
|
||||
|
||||
if 'www.moodle.tum.de' in url:
|
||||
hrefs = findall(compile('https\:\/\/www\.moodle\.tum\.de\/mod\/resource\/view\.php\?id\=\d{6}'), response.text)
|
||||
hrefs = resolve_direct_links(session, hrefs)
|
||||
else:
|
||||
hrefs = html.fromstring(response.text).xpath('//a/@href')
|
||||
|
||||
|
||||
for f in files:
|
||||
reg = compile(f[0])
|
||||
for href in hrefs:
|
||||
match = reg.findall(href)
|
||||
if match:
|
||||
if not ('https://' in href or 'http://' in href):
|
||||
links.append((url + href, f[1]))
|
||||
else:
|
||||
links.append((href, f[1]))
|
||||
|
||||
return links
|
||||
|
||||
def establish_moodle_session(user, passwd):
|
||||
session = Session()
|
||||
|
||||
session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
|
||||
response = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword', data={'j_username':user, 'j_password':passwd})
|
||||
|
||||
parsed = html.fromstring(response.text)
|
||||
|
||||
session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':parsed.forms[0].fields['RelayState'], 'SAMLResponse':parsed.forms[0].fields['SAMLResponse']})
|
||||
|
||||
return session
|
||||
|
||||
def main(url, files, user='', passwd=''):
|
||||
# create session
|
||||
session = None
|
||||
if 'www.moodle.tum.de' in url:
|
||||
session = establish_moodle_session(user, passwd)
|
||||
else:
|
||||
session = Session()
|
||||
session.auth = (user, passwd)
|
||||
session.headers = {
|
||||
"Accept-Language": "de-DE,de;"
|
||||
}
|
||||
|
||||
# get file links
|
||||
links = get_file_links(session, url, files)
|
||||
|
||||
# download files
|
||||
download_files(session, links)
|
||||
32
update.py.example
Executable file
32
update.py.example
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/python3
|
||||
import tudown
|
||||
|
||||
user = ''
|
||||
passwd = ''
|
||||
|
||||
# +--------+
|
||||
# | Skript |
|
||||
# +--------+
|
||||
|
||||
url = 'http://wwwmayr.informatik.tu-muenchen.de/lehre/2015SS/theo/'
|
||||
|
||||
files = [
|
||||
('2015-theo\.pdf', 'Skript'),
|
||||
('2015-\d{2}-\d{2}\.pdf', 'Skript'),
|
||||
]
|
||||
|
||||
tudown.main(url, files)
|
||||
|
||||
# +-------+
|
||||
# | Übung |
|
||||
# +-------+
|
||||
|
||||
url = 'http://wwwmayr.informatik.tu-muenchen.de/lehre/2015SS/theo/uebung/'
|
||||
|
||||
files = [
|
||||
('ue\d*\.pdf', 'Übungsblätter'),
|
||||
('loesungen/lo\d*\.pdf', 'Lösungsblätter'),
|
||||
('theo15zue\d*_druck\.pdf', 'Skript/ZÜ'),
|
||||
]
|
||||
|
||||
tudown.main(url, files, user=user, passwd=passwd)
|
||||
Reference in New Issue
Block a user