forked from mypan/linkedin_learning_courses_downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllcd.py
112 lines (98 loc) · 4.03 KB
/
llcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import urllib
import sys
import config
import requests
import re
import tempfile
import time
from bs4 import BeautifulSoup
from importlib import reload
from urllib.request import urlopen
from youtube_dl.utils import YoutubeDLCookieJar
try:
from http.cookiejar import CookieJar
except ImportError:
from cookielib import CookieJar
def login():
cookiejar_filename = './cookies.txt'
cookiejar = YoutubeDLCookieJar(cookiejar_filename)
cookiejar.load(ignore_discard=True, ignore_expires=True)
try:
auth_cookie = cookiejar._cookies['.www.linkedin.com']['/']['li_at'].value
except:
sys.exit(0)
temp_file = tempfile.NamedTemporaryFile(delete=False)
try:
cookiejar.save(filename=temp_file.name, ignore_discard=True, ignore_expires=True)
# temp = temp_file.read().decode('utf-8')
# test.assertTrue(re.search(r'li_at', temp))
finally:
temp_file.close()
#os.remove(temp_file.name)
return auth_cookie
def load_page(opener, url, data=None):
try:
response = opener.open(url)
except:
print('[!] Rate limited')
try:
if data is not None:
response = opener.open(url, data)
else:
response = opener.open(url)
return ''.join(response.readlines())
except:
print('[Notice] Exception hit')
sys.exit(0)
def download_file(url, file_path, file_name):
reply = requests.get(url, stream=True)
if not os.path.exists(file_path):
os.makedirs(file_path)
with open(file_path + '/' + file_name, 'wb') as f:
for chunk in reply.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
if __name__ == '__main__':
try:
session = login()
if len(session) == 0:
sys.exit('[!] Unable to obtain a valid authenticated session')
print('[*] Successfully obtained valid authenticated session: %s' % session)
cookies = dict(li_at = session)
except Exception(e):
sys.exit('[!] Error: %s' % e)
headers = {'Csrf-Token':'ajax:4332914976342601831'}
cookies['JSESSIONID'] = 'ajax:4332914976342601831'
for course in config.COURSES:
print('')
course_url = 'https://www.linkedin.com/learning-api/detailedCourses' \
'??fields=videos&addParagraphsToTranscript=true&courseSlug={0}&q=slugs'.format(course)
r = requests.get(course_url, cookies=cookies, headers=headers)
course_name = r.json()['elements'][0]['title']
course_name = re.sub(r'[\\/*?:"<>|]', "", course_name)
chapters = r.json()['elements'][0]['chapters']
print('[*] Parsing "%s" course\'s chapters' % course_name)
print('[*] [%d chapters found]' % len(chapters))
for chapter in chapters:
chapter_name = re.sub(r'[\\/*?:"<>|]', "", chapter['title'])
videos = chapter['videos']
vc = 0
print('[*] --- Parsing "%s" chapters\'s videos' % chapter_name)
print('[*] --- [%d videos found]' % len(videos))
for video in videos:
video_name = re.sub(r'[\\/*?:"<>|]', "", video['title'])
video_slug = video['slug']
video_url = 'https://www.linkedin.com/learning-api/detailedCourses' \
'?addParagraphsToTranscript=false&courseSlug={0}&q=slugs&resolution=_720&videoSlug={1}'\
.format(course, video_slug)
r = requests.get(video_url, cookies=cookies, headers=headers)
vc += 1
try:
download_url = re.search('"progressiveUrl":"(.+)","streamingUrl"', r.text).group(1)
except:
print('[!] ------ Can\'t download the video "%s", probably is only for premium users' % video_name)
else:
print('[*] ------ Downloading video "%s"' % video_name)
download_file(download_url, 'out/%s/%s' % (course_name, chapter_name), '%s. %s.mp4' % (str(vc), video_name))
time.sleep(10)