Merge branch 'genericname462-master'

This commit is contained in:
Alexander Weidinger
2015-04-23 15:59:09 +02:00
2 changed files with 120 additions and 89 deletions

View File

@@ -1,26 +1,30 @@
#!/usr/bin/python3
from threading import Thread
import threading
from requests import Session, utils
from lxml import html
from re import compile, findall, match
from re import compile, findall
from os import makedirs
from os.path import exists, getmtime
from calendar import timegm
from time import strptime
from time import strptime, clock, time, sleep
NUM_THREADS = 5 # It's ok....
def create_filepath(filepath):
if not exists(filepath):
makedirs(filepath)
def download_files(session, files):
for f in files:
def download_files(session, f):
filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
if not exists(filename):
response = session.get(f[0])
if response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as f:
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
f.write(chunk)
fd.write(chunk)
print('[+] ' + filename)
else:
response = session.head(f[0])
@@ -31,26 +35,35 @@ def download_files(session, files):
response = session.get(f[0])
if response.status_code == 200:
create_filepath(f[1])
with open(filename, 'wb') as f:
with open(filename, 'wb') as fd:
for chunk in response.iter_content(1024):
f.write(chunk)
fd.write(chunk)
print('[M] ' + filename)
def resolve_direct_links(session, hrefs):
links = []
t = clock()
for href in hrefs:
tmp = session.head(href).headers
if 'Location' in tmp:
links.append(tmp['Location'])
#print("delta resolve:", clock() - t)
return links
def get_links_from_folder(session, urls):
hrefs = []
t = clock()
for url in urls:
response = session.get(url)
hrefs += findall(compile('https\:\/\/www\.moodle\.tum\.de\/pluginfile\.php\/\d{6}\/mod_folder\/content\/0\/(?:[\w\d\_\-]*\/)*[\w\d\_\-\.]{1,}'), response.text)
hrefs += findall(compile(
'https\:\/\/www\.moodle\.tum\.de\/pluginfile\.php\/\d{6}\/mod_folder\/content\/0\/(?:[\w\d\_\-]*\/)*[\w\d\_\-\.]{1,}'),
response.text)
#print("delta folder:", clock() - t)
return hrefs
def get_file_links(session, url, files):
links = []
@@ -69,7 +82,7 @@ def get_file_links(session, url, files):
hrefs = html.fromstring(response.text).xpath('//a/@href')
# ---------------
t = clock()
for f in files:
reg = compile(f[0])
for href in hrefs:
@@ -79,24 +92,30 @@ def get_file_links(session, url, files):
links.append((url + href, f[1]))
else:
links.append((href, f[1]))
#print("delta regex:", clock() - t)
return links
def establish_moodle_session(user, passwd):
session = Session()
session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
response = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword', data={'j_username':user, 'j_password':passwd})
session.get(
'https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php')
response = session.post('https://tumidp.lrz.de/idp/Authn/UserPassword',
data={'j_username': user, 'j_password': passwd})
parsed = html.fromstring(response.text)
session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':parsed.forms[0].fields['RelayState'], 'SAMLResponse':parsed.forms[0].fields['SAMLResponse']})
session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST',
data={'RelayState': parsed.forms[0].fields['RelayState'],
'SAMLResponse': parsed.forms[0].fields['SAMLResponse']})
return session
def main(url, files, user='', passwd=''):
# create session
session = None
t = clock()
if 'www.moodle.tum.de' in url:
session = establish_moodle_session(user, passwd)
else:
@@ -105,9 +124,21 @@ def main(url, files, user='', passwd=''):
session.headers = {
"Accept-Language": "de-DE,de;"
}
#print("delta session:", clock() - t)
# get file links
links = get_file_links(session, url, files)
# download files
download_files(session, links)
#print(threading.active_count())
t1 = clock()
worker = []
for l in links:
while threading.active_count() > NUM_THREADS:
sleep(0.1)
worker.append(Thread(target=download_files, args=(session, l)).start())
[t.join() for t in worker if t]
#print("delta download threaded:", clock() - t1)