From 77a142bd047771187c8da978bab5dd0456aae7e3 Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Sun, 14 Feb 2021 21:24:44 +0200 Subject: [PATCH 1/8] Fixed Google Search HTML parsing to extract links --- .../com/shkmishra/lyrically/LyricsService.kt | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index e0e662b..5bc661f 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -520,6 +520,24 @@ class LyricsService : Service() { } + private fun fetchGoogleSearchResult(url : String) : String { + println("Fetching url: " + url) + + var document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() + var linkContainers = document.getElementsByTag("a") + + for (container in linkContainers) { + if (container.attr("href").substring(0, 7) == "/url?q=") { + val result = container.attr("href").substring(7, container.attr("href").indexOf("&")) // grabbing the first result + println("The first result is: " + result) + + return result + } + } + + return "" + } + // fetches the lyrics from the Internet private fun fetchLyricsAsync() { /* @@ -535,18 +553,13 @@ class LyricsService : Service() { title = "$artist - $track" var url = "https://www.google.com/search?q=" + URLEncoder.encode("lyrics+azlyrics+$artistU+$trackU", "UTF-8") // Google URL - var document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() - var results = document.select("h3.r > a").first() + var lyricURL = fetchGoogleSearchResult(url) - var lyricURL = results.attr("href").substring(7, results.attr("href").indexOf("&")) // grabbing the first result val element: Element var temp: String - println(url) - println(lyricURL) - if (lyricURL.contains("azlyrics.com/lyrics")) { // checking if from the provider we wanted - document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() var page = document.toString() page = page.substring(page.indexOf("that. -->") + 9) @@ -555,16 +568,11 @@ class LyricsService : Service() { } else { url = "https://www.google.com/search?q=" + URLEncoder.encode("genius+" + artistU + "+" + trackU + "lyrics", "UTF-8") - document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() - - results = document.select("h3.r > a").first() - lyricURL = results.attr("href").substring(7, results.attr("href").indexOf("&")) - println(url) - println(lyricURL) + lyricURL = fetchGoogleSearchResult(url) if (lyricURL.contains("genius")) { - document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() val selector = document.select("div.h2") for (e in selector) { @@ -576,14 +584,9 @@ class LyricsService : Service() { } else { url = "https://www.google.com/search?q=" + URLEncoder.encode("lyrics.wikia+$trackU+$artistU", "UTF-8") - document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() - - results = document.select("h3.r > a").first() - lyricURL = results.attr("href").substring(7, results.attr("href").indexOf("&")) - println(url) - println(lyricURL) + lyricURL = fetchGoogleSearchResult(url) - document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() element = document.select("div[class=lyricbox]").first() temp = element.toString() From 81474d152cf0d74338b37b291cd26f31d3d5b879 Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 01:25:10 +0200 Subject: [PATCH 2/8] Ignore changes inside .idea folder --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c6cbe56..b339598 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ *.iml .gradle /local.properties -/.idea/workspace.xml -/.idea/libraries +/.idea/* .DS_Store /build /captures From 692740356e43615eb82559ebd4e07233645bdad9 Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 02:02:00 +0200 Subject: [PATCH 3/8] Code refactoring --- .../java/com/shkmishra/lyrically/LyricsService.kt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index 5bc661f..2d2703b 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -520,7 +520,9 @@ class LyricsService : Service() { } - private fun fetchGoogleSearchResult(url : String) : String { + // fetches the first result from Google Search + private fun fetchGoogleSearchResult(request : String) : String { + val url = "https://www.google.com/search?q=" + URLEncoder.encode(request, "UTF-8") println("Fetching url: " + url) var document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() @@ -552,8 +554,7 @@ class LyricsService : Service() { try { title = "$artist - $track" - var url = "https://www.google.com/search?q=" + URLEncoder.encode("lyrics+azlyrics+$artistU+$trackU", "UTF-8") // Google URL - var lyricURL = fetchGoogleSearchResult(url) + var lyricURL = fetchGoogleSearchResult("lyrics+azlyrics+$artistU+$trackU") val element: Element var temp: String @@ -567,8 +568,7 @@ class LyricsService : Service() { temp = page } else { - url = "https://www.google.com/search?q=" + URLEncoder.encode("genius+" + artistU + "+" + trackU + "lyrics", "UTF-8") - lyricURL = fetchGoogleSearchResult(url) + lyricURL = fetchGoogleSearchResult("genius+" + artistU + "+" + trackU + "lyrics") if (lyricURL.contains("genius")) { @@ -583,8 +583,7 @@ class LyricsService : Service() { temp = element.toString().substring(0, element.toString().indexOf("")) } else { - url = "https://www.google.com/search?q=" + URLEncoder.encode("lyrics.wikia+$trackU+$artistU", "UTF-8") - lyricURL = fetchGoogleSearchResult(url) + lyricURL = fetchGoogleSearchResult("lyrics.wikia+$trackU+$artistU") var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() element = document.select("div[class=lyricbox]").first() From 3ae3e7dbe2e63353dfb8af3fcce16cda300546c6 Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 03:21:13 +0200 Subject: [PATCH 4/8] Fixed retrieving lyrics from genius.com, added retrieving lyrics from www.songlyrics.com --- .../com/shkmishra/lyrically/LyricsService.kt | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index 2d2703b..17a3b9e 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -554,7 +554,7 @@ class LyricsService : Service() { try { title = "$artist - $track" - var lyricURL = fetchGoogleSearchResult("lyrics+azlyrics+$artistU+$trackU") + var lyricURL = fetchGoogleSearchResult("azlyrics.com+$artistU+$trackU") val element: Element var temp: String @@ -568,27 +568,29 @@ class LyricsService : Service() { temp = page } else { - lyricURL = fetchGoogleSearchResult("genius+" + artistU + "+" + trackU + "lyrics") + lyricURL = fetchGoogleSearchResult("genius.com+$artistU+$trackU") - if (lyricURL.contains("genius")) { + if (lyricURL.contains("genius.com")) { - var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + var document = Jsoup.connect(lyricURL).userAgent("Mozilla/5.0").timeout(10000).get() // USER_AGENT doesn't work, returns code 503 + val elements = document.select("div[class^=\"Lyrics__Container\"]") - val selector = document.select("div.h2") - for (e in selector) { - e.remove() + if (elements.size > 0) { + temp = elements.first().toString() + } else { + temp = elements.toString() } - - element = document.select("div[class=song_body-lyrics]").first() - temp = element.toString().substring(0, element.toString().indexOf("")) } else { - lyricURL = fetchGoogleSearchResult("lyrics.wikia+$trackU+$artistU") - - var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() - element = document.select("div[class=lyricbox]").first() - temp = element.toString() + lyricURL = fetchGoogleSearchResult("www.songlyrics.com+$trackU+$artistU") + if (lyricURL.contains("www.songlyrics.com")) { + var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + element = document.select("div[id=songLyricsDiv-outer]").first() + temp = element.toString() + } else { + temp = "" + } } } @@ -601,8 +603,6 @@ class LyricsService : Service() { lyrics = lyrics.replace("br2n".toRegex(), "\n") lyrics = lyrics.replace("]shk".toRegex(), "]\n") lyrics = lyrics.replace("shk\\[".toRegex(), "\n [") - if (lyricURL.contains("genius")) - lyrics = lyrics.substring(lyrics.indexOf("Lyrics") + 6) } catch (e: Exception) { e.printStackTrace() return@async null From d0bc0807088a3c3353038ed2a0c9275c84fce2af Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 03:31:38 +0200 Subject: [PATCH 5/8] Removed .idea folder --- .idea/.name | 1 - .idea/compiler.xml | 22 ------------- .idea/copyright/profiles_settings.xml | 3 -- .idea/encodings.xml | 6 ---- .idea/gradle.xml | 24 -------------- .idea/misc.xml | 46 --------------------------- .idea/modules.xml | 9 ------ .idea/runConfigurations.xml | 12 ------- .idea/vcs.xml | 6 ---- 9 files changed, 129 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/gradle.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/runConfigurations.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 9ce3dc9..0000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -Lyrically \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index 96cc43e..0000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index e7bedf3..0000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index 97626ba..0000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/gradle.xml b/.idea/gradle.xml deleted file mode 100644 index 0bd7bc6..0000000 --- a/.idea/gradle.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 5d19981..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index f4214e9..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/runConfigurations.xml b/.idea/runConfigurations.xml deleted file mode 100644 index 7f68460..0000000 --- a/.idea/runConfigurations.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From afe66d31da04ab8cc5d8ae889e2197a84d236fec Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 12:19:26 +0200 Subject: [PATCH 6/8] Throw LyricsNotFoundException if lyrics are not found, print correct track title --- .../com/shkmishra/lyrically/LyricsService.kt | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index 17a3b9e..99558c9 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -31,6 +31,8 @@ import java.io.* import java.net.URLEncoder import java.util.* +class LyricsNotFoundException(message: String): Exception(message) + class LyricsService : Service() { @@ -313,7 +315,12 @@ class LyricsService : Service() { artist = intent.getStringExtra("artist") track = intent.getStringExtra("track") - title = "$artist - $track" + // artist entry may be empty + if (artist.isEmpty()) { + title = track + } else { + title = "$artist - $track" + } titleTV.text = title @@ -552,7 +559,12 @@ class LyricsService : Service() { progressBar.visibility = View.VISIBLE val result = async(context = CommonPool, parent = asyncJob) { try { - title = "$artist - $track" + // artist entry may be empty + if (artist.isEmpty()) { + title = track + } else { + title = "$artist - $track" + } var lyricURL = fetchGoogleSearchResult("azlyrics.com+$artistU+$trackU") @@ -594,6 +606,11 @@ class LyricsService : Service() { } } + // lyrics are not found or track is instrumental + if (temp.isEmpty()) { + throw LyricsNotFoundException("Lyrics are not found for track: " + title) + } + // preserving line breaks temp = temp.replace("(?i)]*>".toRegex(), "br2n") temp = temp.replace("]".toRegex(), "]shk") From 4e9080a6600a82b43bdc5df8aee4d89187c33672 Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Mon, 15 Feb 2021 14:45:47 +0200 Subject: [PATCH 7/8] Code refactroing --- .../com/shkmishra/lyrically/LyricsService.kt | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index 99558c9..7fd8f9b 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -527,10 +527,27 @@ class LyricsService : Service() { } + // converts string array to url request + private fun stringArrayToRequest(stringArray : Array) : String { + var request = "" + + for (string in stringArray) { + if (!string.isEmpty()) { + if (stringArray.first() != string) { + request += "+" + } + + request += string + } + } + + return request + } + // fetches the first result from Google Search - private fun fetchGoogleSearchResult(request : String) : String { - val url = "https://www.google.com/search?q=" + URLEncoder.encode(request, "UTF-8") - println("Fetching url: " + url) + private fun fetchGoogleSearchResult(keywords : Array) : String { + val url = "https://www.google.com/search?q=" + URLEncoder.encode(stringArrayToRequest(keywords), "UTF-8") + println("Searching for keywords in Google Search: " + keywords.joinToString()) var document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() var linkContainers = document.getElementsByTag("a") @@ -550,7 +567,7 @@ class LyricsService : Service() { // fetches the lyrics from the Internet private fun fetchLyricsAsync() { /* - Currently using 3 providers : azlyrics, genius and lyrics.wikia; in that order + Currently using 3 providers : azlyrics, genius and songlyrics; in that order Procedure : - Google the artist + song name + provider name - Grab the first result and if it is from the provider we wanted, get the lyrics @@ -566,7 +583,7 @@ class LyricsService : Service() { title = "$artist - $track" } - var lyricURL = fetchGoogleSearchResult("azlyrics.com+$artistU+$trackU") + var lyricURL = fetchGoogleSearchResult(arrayOf("azlyrics.com", artistU, trackU)) val element: Element var temp: String @@ -580,7 +597,7 @@ class LyricsService : Service() { temp = page } else { - lyricURL = fetchGoogleSearchResult("genius.com+$artistU+$trackU") + lyricURL = fetchGoogleSearchResult(arrayOf("genius.com", artistU, trackU)) if (lyricURL.contains("genius.com")) { @@ -594,7 +611,7 @@ class LyricsService : Service() { } } else { - lyricURL = fetchGoogleSearchResult("www.songlyrics.com+$trackU+$artistU") + lyricURL = fetchGoogleSearchResult(arrayOf("www.songlyrics.com", artistU, trackU)) if (lyricURL.contains("www.songlyrics.com")) { var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() From 95f99ca42b6ea936eaf9d083853bcf2b2c965d7d Mon Sep 17 00:00:00 2001 From: Monsterovich Date: Tue, 16 Feb 2021 18:38:55 +0200 Subject: [PATCH 8/8] Code refactoring --- .../com/shkmishra/lyrically/LyricsService.kt | 97 +++++++++++-------- 1 file changed, 58 insertions(+), 39 deletions(-) diff --git a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt index 7fd8f9b..a7ed65d 100644 --- a/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt +++ b/app/src/main/java/com/shkmishra/lyrically/LyricsService.kt @@ -549,8 +549,8 @@ class LyricsService : Service() { val url = "https://www.google.com/search?q=" + URLEncoder.encode(stringArrayToRequest(keywords), "UTF-8") println("Searching for keywords in Google Search: " + keywords.joinToString()) - var document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() - var linkContainers = document.getElementsByTag("a") + val document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get() + val linkContainers = document.getElementsByTag("a") for (container in linkContainers) { if (container.attr("href").substring(0, 7) == "/url?q=") { @@ -564,6 +564,50 @@ class LyricsService : Service() { return "" } + private fun fetchLyricsFromAZLyrics() : String { + val lyricURL = fetchGoogleSearchResult(arrayOf("azlyrics.com", artistU, trackU)) + + if (lyricURL.contains("azlyrics.com/lyrics")) { + val document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + var page = document.toString() + + page = page.substring(page.indexOf("that. -->") + 9) + page = page.substring(0, page.indexOf("")) + + return page + } + + return "" + } + + private fun fetchLyricsFromGenius() : String { + val lyricURL = fetchGoogleSearchResult(arrayOf("genius.com", artistU, trackU)) + + if (lyricURL.contains("genius.com")) { + val document = Jsoup.connect(lyricURL).userAgent("Mozilla/5.0").timeout(10000).get() // USER_AGENT doesn't work, returns code 503 + val elements = document.select("div[class^=\"Lyrics__Container\"]") + + if (elements.size > 0) { + return elements.first().toString() + } else { + return elements.toString() + } + } + + return "" + } + + private fun fetchLyricsFromSonglyrics() : String { + val lyricURL = fetchGoogleSearchResult(arrayOf("www.songlyrics.com", artistU, trackU)) + + if (lyricURL.contains("www.songlyrics.com")) { + val document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() + return document.select("div[id=songLyricsDiv-outer]").first().toString() + } + + return "" + } + // fetches the lyrics from the Internet private fun fetchLyricsAsync() { /* @@ -583,43 +627,18 @@ class LyricsService : Service() { title = "$artist - $track" } - var lyricURL = fetchGoogleSearchResult(arrayOf("azlyrics.com", artistU, trackU)) - - val element: Element - var temp: String - - if (lyricURL.contains("azlyrics.com/lyrics")) { // checking if from the provider we wanted - var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() - var page = document.toString() - - page = page.substring(page.indexOf("that. -->") + 9) - page = page.substring(0, page.indexOf("")) - temp = page - } else { - - lyricURL = fetchGoogleSearchResult(arrayOf("genius.com", artistU, trackU)) - - if (lyricURL.contains("genius.com")) { - - var document = Jsoup.connect(lyricURL).userAgent("Mozilla/5.0").timeout(10000).get() // USER_AGENT doesn't work, returns code 503 - val elements = document.select("div[class^=\"Lyrics__Container\"]") - - if (elements.size > 0) { - temp = elements.first().toString() - } else { - temp = elements.toString() - } - } else { - - lyricURL = fetchGoogleSearchResult(arrayOf("www.songlyrics.com", artistU, trackU)) - - if (lyricURL.contains("www.songlyrics.com")) { - var document = Jsoup.connect(lyricURL).userAgent(USER_AGENT).get() - element = document.select("div[id=songLyricsDiv-outer]").first() - temp = element.toString() - } else { - temp = "" - } + var temp = "" + val fetchFunctions = arrayOf( + ::fetchLyricsFromAZLyrics, + ::fetchLyricsFromGenius, + ::fetchLyricsFromSonglyrics) + + for (function in fetchFunctions) { + val result = function() + + if (!result.isEmpty()) { + temp = result + break } }