#!/usr/bin/env python3
import requests
import subprocess
from lxml import html
import json
import re
from requests import utils
from os.path import exists, getmtime
from os import makedirs
from calendar import timegm
from time import strptime, sleep
import sys

def create_filepath(filepath):
    if not exists(filepath):
        makedirs(filepath)

def download_files(session, f):
    filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):]
    if not exists(filename):
        response = session.get(f[0], allow_redirects=False)
        if response.status_code == 301:
            download_files(session, (response.headers['Location'], f[1]))
        elif response.status_code == 200:
            create_filepath(f[1])
            with open(filename, 'wb') as fd:
                for chunk in response.iter_content(1024):
                    fd.write(chunk)
            print('[+] ' + filename)
    else:
        response = session.head(f[0], allow_redirects=False)
        if response.status_code == 301:
            download_files(session, (response.headers['Location'], f[1]))
        elif response.status_code == 200:
            last_mod_file = getmtime(filename)
            try:
                last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z'))
            except KeyError:
                print('Can\'t check {} for updates.'.format(f[0]))
                last_mod_www = last_mod_file

            if last_mod_www > last_mod_file:
                response = session.get(f[0])
                if response.status_code == 200:
                    create_filepath(f[1])
                    with open(filename, 'wb') as fd:
                        for chunk in response.iter_content(1024):
                            fd.write(chunk)
                    print('[M] ' + filename)

def get_moodle_session(username, password):
	session = requests.session()
	r = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https%3A%2F%2Ftumidp.lrz.de%2Fidp%2Fshibboleth&target=https%3A%2F%2Fwww.moodle.tum.de%2Fauth%2Fshibboleth%2Findex.php')
	r = session.post(r.url, data={'j_username':username, 'j_password':password, '_eventId_proceed':''})
	html_resp = html.fromstring(r.text)
	r = session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState':html_resp.forms[0].fields['RelayState'], 'SAMLResponse':html_resp.forms[0].fields['SAMLResponse']})
	return session

def get_resource(j, verbose):
	# establish a session
	session = None
	username = None
	password = None

	# extract username and password
	try:
		with subprocess.Popen([j['username_script'][0], j['username_script'][1]], stdout=subprocess.PIPE) as proc:
			username = proc.stdout.read().strip()
		with subprocess.Popen([j['password_script'][0], j['password_script'][1]], stdout=subprocess.PIPE) as proc:
			password = proc.stdout.read().strip()
	except KeyError:
		None

	try:
		username = j['username']
		password = j['password']
	except KeyError:
		None

	if 'moodle.tum.de' in j['url']:
		session = get_moodle_session(username, password)
	else:
		session = requests.session()
		try:
			session.auth = (username, password)
		except (KeyError):
			None

	# separate url
	protocol, url = j['url'].split('://', 1)
	hostname, path = url.split('/', 1)


	# download
	r = session.get(j['url'])
	hrefs = html.fromstring(r.text).xpath('//a/@href')
	abs_hrefs = []

	for href in hrefs:
		# strip ./ from relative href
		if href.startswith('./'):
			href = href[2:]

		if href.startswith('https://www.moodle.tum.de/mod/resource/view.php?id='):
			tmp = session.head(href).headers
			abs_hrefs.append(tmp['Location'])
			continue
		if href.startswith('/'):
			abs_hrefs.append(protocol + '://' + hostname + href)
			continue
		if href.startswith('http://') or href.startswith('https://'):
			abs_hrefs.append(href)
			continue
		if href.startswith('../'):
			abs_hrefs.append(j['url'].rsplit('/', 1)[0].rsplit('/', 1)[0] + '/' + href[3:])
			continue
		# else
		abs_hrefs.append(j['url'].rsplit('/', 1)[0] + '/' + href)

	if verbose:
		print(abs_hrefs)
		print([href for href in hrefs if '.pdf' in href])

	for href in abs_hrefs:
		for file in j['files']:
			if not 'regex' in file:
				download_files(session, [href, file['folder']])
				break
			if re.search(file['regex'], href):
				download_files(session, [href, file['folder']])
				break

def main():
	if not exists('tudown.json'):
		print('tudown.json not found')
		sys.exit(1)
	else:
		verbose = False
		# verbose mode
		if len(sys.argv) > 1:
			if sys.argv[1] == '-v':
				verbose = True

		# open download configuration into json
		fh = open('tudown.json', 'r')
		j = json.loads(fh.read())
		fh.close()

		for resource in j:
			get_resource(resource, verbose)


if __name__ == '__main__':
	main()