diff --git a/app/src/main/java/net/vrgsoft/rxurlparser/MainActivity.kt b/app/src/main/java/net/vrgsoft/rxurlparser/MainActivity.kt index 5bd5250..3152b97 100644 --- a/app/src/main/java/net/vrgsoft/rxurlparser/MainActivity.kt +++ b/app/src/main/java/net/vrgsoft/rxurlparser/MainActivity.kt @@ -13,7 +13,8 @@ class MainActivity : AppCompatActivity() { private lateinit var mBinding: ActivityMainBinding private val crawler = LinkCrawler() - private var parseSubscription: Disposable? = null + private var githubSubscription: Disposable? = null + private var youtubeSubscription: Disposable? = null override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) @@ -24,15 +25,22 @@ class MainActivity : AppCompatActivity() { .show() } - parseSubscription = + githubSubscription = crawler.parseUrl("https://github.com") .subscribe { t -> - mBinding.content = t.result + mBinding.contentGithub = t.result + } + + youtubeSubscription = + crawler.parseUrl("https://youtu.be/X1RVYt2QKQE") + .subscribe { t -> + mBinding.contentYoutube = t.result } } override fun onPause() { - parseSubscription?.dispose() + githubSubscription?.dispose() + youtubeSubscription?.dispose() super.onPause() } } diff --git a/app/src/main/res/layout/activity_main.xml b/app/src/main/res/layout/activity_main.xml index 95fb1cb..8f65cfb 100644 --- a/app/src/main/res/layout/activity_main.xml +++ b/app/src/main/res/layout/activity_main.xml @@ -6,7 +6,11 @@ + + @@ -16,45 +20,81 @@ tools:context="net.vrgsoft.rxurlparser.MainActivity"> + + + + + + \ No newline at end of file diff --git a/library/src/main/java/net/vrgsoft/library/LinkCrawler.kt b/library/src/main/java/net/vrgsoft/library/LinkCrawler.kt index 63b3a57..72c357b 100644 --- a/library/src/main/java/net/vrgsoft/library/LinkCrawler.kt +++ b/library/src/main/java/net/vrgsoft/library/LinkCrawler.kt @@ -1,17 +1,13 @@ package net.vrgsoft.library -import io.reactivex.* +import io.reactivex.Flowable +import io.reactivex.Single +import io.reactivex.processors.PublishProcessor import io.reactivex.schedulers.Schedulers import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.jsoup.select.Elements -import java.io.IOException -import java.net.MalformedURLException -import java.net.URL -import java.net.URLConnection -import io.reactivex.Flowable -import io.reactivex.processors.PublishProcessor typealias LinkPreloadCallback = (String) -> Unit @@ -67,24 +63,25 @@ class LinkCrawler { return Single.fromCallable { val urls: List = SearchUrls.matches(url) when { - urls.isNotEmpty() -> content.finalUrl = unshortenUrl(extendedTrim(urls[0])) - else -> content.finalUrl = "" + urls.isNotEmpty() -> content.url = extendedTrim(urls[0]) + else -> content.url = "" } - if (content.finalUrl != "") { + if (content.url != "") { when { - isImage(content.finalUrl) && !content.finalUrl.contains("dropbox") -> { + isImage(content.url) && !content.url.contains("dropbox") -> { content.success = true - content.images.add(content.finalUrl) + content.images.add(content.url) content.title = "" content.description = "" } else -> try { - val doc: Document = Jsoup.connect(content.finalUrl).userAgent("Mozzila").get() + val doc: Document = Jsoup.connect(content.url).userAgent("Mozzila").get() content.htmlCode = extendedTrim(doc.toString()) val metaTags: Map = getMetaTags(content.htmlCode) content.metaTags = metaTags - content.title = metaTags["title"]!! - content.description = metaTags["description"]!! + content.title = metaTags["title"] ?: "" + content.description = metaTags["description"] ?: "" + content.finalUrl = metaTags["url"] ?: "" when { content.title == "" -> { @@ -109,9 +106,9 @@ class LinkCrawler { } } } - val linksSet = content.finalUrl.split("&") - content.url = linksSet[0] - content.canonicalUrl = canonicalPage(content.finalUrl) + val linksSet = content.url.split("&") + content.finalUrl = linksSet[0] + content.canonicalUrl = canonicalPage(content.url) content.description = trimTags(content.description) //return content content @@ -247,41 +244,6 @@ class LinkCrawler { return matches } - private fun unshortenUrl(url: String): String { - if (!url.startsWith(HTTP_PROTOCOL) && !url.startsWith(HTTPS_PROTOCOL)) { - return "" - } - - var urlConn = connectURL(url) - urlConn?.headerFields - - var finalResult = urlConn?.url.toString() - - urlConn = connectURL(finalResult) - urlConn?.headerFields - - while (urlConn?.url.toString() != finalResult) { - finalResult = unshortenUrl(finalResult) - } - - return finalResult - - } - - private fun connectURL(strURL: String): URLConnection? { - var conn: URLConnection? = null - try { - val inputURL = URL(strURL) - conn = inputURL.openConnection() - } catch (e: MalformedURLException) { - println("Please input a valid URL") - } catch (ioe: IOException) { - println("Can not connect to the URL") - } - - return conn - } - private fun htmlDecode(content: String): String = Jsoup.parse(content).text() private fun trimTags(content: String): String = Jsoup.parse(content).text()