-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlyrics_extractor copy.py
116 lines (91 loc) · 4.07 KB
/
lyrics_extractor copy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import requests
from bs4 import BeautifulSoup
class Song_Lyrics():
"""
Initialises a class Song_lyrics.
It takes in Google Custom Search API & Google Engine ID as arguments.
The ID and API key is used whenever get_lyrics function is called to fetch lyrics.
"""
def __init__(self, GCS_API_KEY, GCS_ENGINE_ID):
self.GCS_API_KEY = GCS_API_KEY
self.GCS_ENGINE_ID = GCS_ENGINE_ID
def get_lyrics(self, song_name):
"""
Searches lyrics for the song name passed in.
Autocorrects any song name spelling errors.
Fetches and stores the HTML of received URL.
Extracts Title & Lyrics from the HTML.
Returns title and lyrics.
"""
url = "https://www.googleapis.com/customsearch/v1/siterestrict?key=" + self.GCS_API_KEY + "&cx=" + self.GCS_ENGINE_ID + "&q=" + song_name.replace(" ","%20") + "%20lyrics"
page = requests.get(url)
data = page.json()
try:
# Gets URL of the first Result
get_data = data["items"][0]["link"]
# print(get_data)
title = data["items"][0]["title"]
# print(title)
# getting the url of the site
page = requests.get(get_data)
soup = BeautifulSoup(page.content, 'lxml')
# Method 1 Genius
if 'genius' in get_data:
extract = soup.findAll('div',class_="Lyrics__Container-sc-1ynbvzw-2")
extracted = []
for elem in extract:
for br in elem.findAll('br'):
br.replace_with("\n")
elem = elem.get_text().strip()
if elem != '':
extracted.append(elem)
lyrics = "\n\n".join(extracted)
title = title[:-16]
# print("From inside : ",lyrics)
# Method 2 Glamsham
elif 'glamsham' in get_data:
extract = soup.find_all('font', class_='general')[5]
for br in extract.find_all("br"): # This Prints out newlines instead of <br> tags
br.replace_with("\n")
lyrics = extract.get_text()
title = title[:-14].strip()
# Method 3 LyricsBell
elif 'lyricsbell' in get_data:
extract = soup.select(".lyrics-col p")
lyrics = ''
for i in range(len(extract)):
lyrics += extract[i].get_text() + '<br><br>'
title = title[:-13]
# Method 4 LyricsTed
elif 'lyricsted' in get_data:
extract = soup.select(".lyric-content p")
lyrics = ''
for i in range(len(extract)):
# This Prints out newlines instead of <br> tags
lyrics += extract[i].get_text().strip() + '<br><br>'
title = title
# Method 5 LyricsOff
elif 'lyricsoff' in get_data:
extract = soup.select("#main_lyrics p")
lyrics = ''
for i in range(len(extract)):
# This Prints out newlines instead of <br> tags
lyrics += extract[i].get_text(separator="\n").strip() + '<br><br>'
title = title
# Method 6 LyricsMint
elif 'lyricsmint' in get_data:
extract = soup.select("#lyrics p")
lyrics = ''
for i in range(len(extract)):
# This Prints out newlines instead of <br> tags
lyrics += extract[i].get_text(separator="\n").strip() + '<br><br>'
title = title
# print("From inside",lyrics)
lyrics = lyrics.replace('\n\n','\n')
lyrics = lyrics.replace('<br>','\n')
except Exception as e:
print("Exception Occured : ", e)
title = "Err: No lyrics found for " + song_name
lyrics = ''
# print(lyrics)
return title, lyrics