Blerb.

fake-name · fake-name · commit 99e7975b6d51 · 2022-05-30T20:22:21.000-07:00
diff --git a/WebMirror/UrlUpserter.py b/WebMirror/UrlUpserter.py
@@ -222,24 +222,24 @@ def do_link_batch_update_sess(logger, interface, link_batch, max_pri=None, show_
 
 	for item in link_batch:
 		try:
-			assert 'url'              in item
-			assert 'starturl'         in item
-			assert 'netloc'           in item
-			assert 'distance'         in item
-			assert 'is_text'          in item
-			assert 'priority'         in item
-			assert 'type'             in item
-			assert 'addtime'          in item
-			assert 'state'            in item
-			assert 'epoch'            in item
+			assert 'url'              in item, "Missing key 'url'"
+			assert 'starturl'         in item, "Missing key 'starturl'"
+			assert 'netloc'           in item, "Missing key 'netloc'"
+			assert 'distance'         in item, "Missing key 'distance'"
+			assert 'is_text'          in item, "Missing key 'is_text'"
+			assert 'priority'         in item, "Missing key 'priority'"
+			assert 'type'             in item, "Missing key 'type'"
+			assert 'addtime'          in item, "Missing key 'addtime'"
+			assert 'state'            in item, "Missing key 'state'"
+			assert 'epoch'            in item, "Missing key 'epoch'"
 
 			if not 'maximum_priority' in item:
 				item['maximum_priority'] = item['priority']
 
 			if item['distance'] < item['maximum_priority']:
 				item['distance'] = item['maximum_priority']
 
-			assert 'maximum_priority' in item
+			assert 'maximum_priority' in item, "Missing key 'maximum_priority'"
 
 			# psycopg2cffi._impl.exceptions.OperationalError: index row size 3192 exceeds maximum 2712 for index "ix_web_pages_url"
 			assert len(item['url']) < 2712, "URL Too long for postgres. Length %s for url '%s'" % (
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractChococatsId.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractChococatsId.py
@@ -0,0 +1,22 @@
+
+def extractChococatsId(item):
+	'''
+	Parser for 'chococats.id'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractChococatshomeWpcomstagingCom.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractChococatshomeWpcomstagingCom.py
@@ -0,0 +1,22 @@
+
+def extractChococatshomeWpcomstagingCom(item):
+	'''
+	Parser for 'chococatshome.wpcomstaging.com'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractFkshsbfWordpressCom.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractFkshsbfWordpressCom.py
@@ -0,0 +1,22 @@
+
+def extractFkshsbfWordpressCom(item):
+	'''
+	Parser for 'fkshsbf.wordpress.com'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractGingersfairyWixsiteCom.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractGingersfairyWixsiteCom.py
@@ -0,0 +1,22 @@
+
+def extractGingersfairyWixsiteCom(item):
+	'''
+	Parser for 'gingersfairy.wixsite.com'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractNightwaterBar.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractNightwaterBar.py
@@ -0,0 +1,22 @@
+
+def extractNightwaterBar(item):
+	'''
+	Parser for 'nightwater.bar'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractRainytranslationsWpcomstagingCom.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractRainytranslationsWpcomstagingCom.py
@@ -0,0 +1,22 @@
+
+def extractRainytranslationsWpcomstagingCom(item):
+	'''
+	Parser for 'rainytranslations.wpcomstaging.com'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/WebMirror/management/rss_parser_funcs/feed_parse_extractSassystrawberryCom.py b/WebMirror/management/rss_parser_funcs/feed_parse_extractSassystrawberryCom.py
@@ -0,0 +1,22 @@
+
+def extractSassystrawberryCom(item):
+	'''
+	Parser for 'sassystrawberry.com'
+	'''
+
+	vol, chp, frag, postfix = extractVolChapterFragmentPostfix(item['title'])
+	if not (chp or vol) or "preview" in item['title'].lower():
+		return None
+
+	tagmap = [
+		('PRC',       'PRC',                      'translated'),
+		('Loiterous', 'Loiterous',                'oel'),
+	]
+
+	for tagname, name, tl_type in tagmap:
+		if tagname in item['tags']:
+			return buildReleaseMessageWithType(item, name, vol, chp, frag=frag, postfix=postfix, tl_type=tl_type)
+
+
+	return False
+	
diff --git a/common/util/urlFuncs.py b/common/util/urlFuncs.py
@@ -327,6 +327,13 @@ def cleanUrl(urlin):
 			if 'z' in qs:
 				urlin = qs['z'][0]
 
+	if urlin.startswith("https://href.li/?"):
+		# Seems to just blob the url after the '?'
+		urlin = urlin.split("?", 1)[-1]
+
+
+	assert urlin != None
+
 	# RSS garbage
 	if '#utm_source=' in urlin:
 		urlin = urlin.split("#utm_source=")[0]
@@ -336,6 +343,9 @@ def cleanUrl(urlin):
 
 	ret = unwrap_redirect(urlin, resolve_redirects)
 
+	if ret is None:
+		return ret
+
 	# I hate feedburner
 	if '?utm_source=' in ret:
 		ret = ret.split("?utm_source=")[0]
@@ -551,6 +561,10 @@ def urlClean(url):
 	# Google docs can be accessed with or without the '/preview' postfix
 	# We want to remove this if it's present, so we don't duplicate content.
 	url = trimGDocUrl(url)
+
+
+	assert url != None
+
 	url = cleanUrl(url)
 
 	if url is None:
@@ -579,7 +593,8 @@ def getNetLoc(url):
 	# print(isGFileUrl('https://drive.google.com/folderview?id=0B_mXfd95yvDfQWQ1ajNWZTJFRkk&usp=drive_web'))
 	# print(urlClean('http://inmydaydreams.com/?p=6128&share=tumblr'))
 	# print(urlClean('http://inmydaydreams.com/?p=6091&share=tumblr'))
-	print(urlClean('https://www.tumblr.com/privacy/consent?redirect=https%3a%2f%2ffoxghost.tumblr.com%2fpost%2f190325087867'))
+	print(urlClean('https://href.li/?https://woopread.com/series/your-meaning/'))
+	# print(urlClean('https://www.tumblr.com/privacy/consent?redirect=https%3a%2f%2ffoxghost.tumblr.com%2fpost%2f190325087867'))
 
 	# print(hasDuplicateSegments('http://www.spcnet.tv/forums/showthread.php/23450-i-ve-decided-to-learn-chinese/index/images/misc/image.php?s=1129386e978631b0771a226dba5a82e5&u=65&dateline=1358455669'))
 	# print(hasDuplicateSegments('http://inmydaydreams.com/?p=6091&share=tumblr'))
diff --git a/requirements.txt b/requirements.txt
@@ -67,6 +67,7 @@ git+https://github.com/kvesteri/sqlalchemy-continuum.git
 
 # Probably requires
 # sudo apt-get install libtiff5-dev libjpeg8-dev zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev libharfbuzz-dev libfribidi-dev
+
 pillow==8.4.0
 
 # probably requires `sudo apt install libxml2-dev libxslt1-dev`