Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
Expand Down Expand Up @@ -87,3 +86,8 @@ ENV/

# Rope project settings
.ropeproject

# lambda
.lambda
config
*.zip
80 changes: 80 additions & 0 deletions CommunityGraphTwitterCleanLinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from urllib import urlencode
from urlparse import urlparse, urlunparse, parse_qs
from neo4j.v1 import GraphDatabase, basic_auth
import os

import boto3

from base64 import b64decode

def lambda_handler(event, context):
print("Event:", event)
version_updated = "Default (Updating public graph)"
NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', "test")
NEO4J_URL = os.environ.get('NEO4J_URL', "bolt://localhost")

if event and event.get("resources"):
if "CommunityGraphTwitterCleanLinksPublic" in event["resources"][0]:
version_updated = "Updating public graph"
ENCRYPTED_NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
NEO4J_PASSWORD = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_NEO4J_PASSWORD))['Plaintext']
NEO4J_URL = os.environ.get('NEO4J_PUBLIC_URL')
elif "CommunityGraphTwitterCleanLinksPrivate" in event["resources"][0]:
version_updated = "Updating private graph"
ENCRYPTED_NEO4J_PASSWORD = os.environ['NEO4J_PRIVATE_PASSWORD']
NEO4J_PASSWORD = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_NEO4J_PASSWORD))['Plaintext']
NEO4J_URL = os.environ.get('NEO4J_PRIVATE_URL')

neo4jUrl = NEO4J_URL
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
neo4jPass = NEO4J_PASSWORD

print(version_updated)
clean_links(neo4jUrl = neo4jUrl, neo4jUser = neo4jUser, neo4jPass = neo4jPass)


def clean_links(neo4jUrl, neo4jUser, neo4jPass):
driver = GraphDatabase.driver(neo4jUrl, auth=basic_auth(neo4jUser, neo4jPass))

query = "MATCH (l:Link) WHERE NOT EXISTS(l.cleanUrl) RETURN l, ID(l) AS internalId"

session = driver.session()
result = session.run(query)

updates = []
for row in result:
uri = row["l"]["url"]
if uri:
uri = uri.encode('utf-8')
updates.append({"id": row["internalId"], "clean": clean_uri(uri)})

print("Updates to apply", updates)

updateQuery = """\
UNWIND {updates} AS update
MATCH (l:Link) WHERE ID(l) = update.id
SET l.cleanUrl = update.clean
"""

update_result = session.run(updateQuery, {"updates": updates})

print(update_result)

session.close()


def clean_uri(url):
u = urlparse(url)
query = parse_qs(u.query)

for param in ["utm_content", "utm_source", "utm_medium", "utm_campaign", "utm_term"]:
query.pop(param, None)

u = u._replace(query=urlencode(query, True))
return urlunparse(u)

if __name__ == "__main__":
neo4jPass = os.environ.get('NEO4J_PASSWORD', "test")
neo4jUrl = os.environ.get('NEO4J_URL', "bolt://localhost")
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
clean_links(neo4jUrl=neo4jUrl, neo4jUser=neo4jUser, neo4jPass=neo4jPass)
86 changes: 86 additions & 0 deletions CommunityGraphTwitterHydrateLinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import socket

import requests
import boto3
import os

from neo4j.v1 import GraphDatabase, basic_auth
from base64 import b64decode
from bs4 import BeautifulSoup, Tag

def lambda_handler(event, context):
print("Event:", event)
version_updated = "Default (Updating public graph)"
NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', "test")
NEO4J_URL = os.environ.get('NEO4J_URL', "bolt://localhost")

if event and event.get("resources"):
if "CommunityGraphTwitterHydrateLinksPublic" in event["resources"][0]:
version_updated = "Updating public graph"
ENCRYPTED_NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
NEO4J_PASSWORD = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_NEO4J_PASSWORD))['Plaintext']
NEO4J_URL = os.environ.get('NEO4J_PUBLIC_URL')
elif "CommunityGraphTwitterHydrateLinksPrivate" in event["resources"][0]:
version_updated = "Updating private graph"
ENCRYPTED_NEO4J_PASSWORD = os.environ['NEO4J_PRIVATE_PASSWORD']
NEO4J_PASSWORD = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_NEO4J_PASSWORD))['Plaintext']
NEO4J_URL = os.environ.get('NEO4J_PRIVATE_URL')

neo4jUrl = NEO4J_URL
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
neo4jPass = NEO4J_PASSWORD

print(version_updated)
hydrate_links(neo4jUrl = neo4jUrl, neo4jUser = neo4jUser, neo4jPass = neo4jPass)


def hydrate_links(neo4jUrl, neo4jUser, neo4jPass):
driver = GraphDatabase.driver(neo4jUrl, auth=basic_auth(neo4jUser, neo4jPass))
session = driver.session()
result = session.run(
"MATCH (link:Link) WHERE not exists(link.title) RETURN id(link) as id, link.url as url ORDER BY ID(link) DESC LIMIT {limit}",
{"limit": 100})
update = []
rows = 0
for record in result:
try:
print(record["url"])
title = hydrate_url(record["url"])
rows += 1
update += [{"id": record["id"], "title": title}]
except socket.gaierror:
print("Failed to resolve {0}. Ignoring for now".format(record["url"]))
except socket.error:
print("Failed to connect to {0}. Ignoring for now".format(record["url"]))

print("urls", len(update), "records", rows)
result = session.run(
"UNWIND {data} AS row MATCH (link) WHERE id(link) = row.id SET link.title = row.title",
{"data": update})
print(result.consume().counters)
session.close()


def hydrate_url(url):
user_agent = {'User-agent': 'Mozilla/5.0'}
potential_title = []
try:
if url:
r = requests.get(url, headers=user_agent)
response = r.text
page = BeautifulSoup(response, "html.parser")
potential_title = page.find_all("title")
except requests.exceptions.ConnectionError:
print("Failed to connect: ", url)

if len(potential_title) == 0:
print("Skipping: ", url)
return "N/A"
else:
return potential_title[0].text

if __name__ == "__main__":
neo4jPass = os.environ.get('NEO4J_PASSWORD', "test")
neo4jUrl = os.environ.get('NEO4J_URL', "bolt://localhost")
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
hydrate_links(neo4jUrl=neo4jUrl, neo4jUser=neo4jUser, neo4jPass=neo4jPass)
42 changes: 42 additions & 0 deletions CommunityGraphTwitterImport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os

from lib.utils import import_links, decrypt_value


def lambda_handler(event, context):
print("Event:", event)
version_updated = "Default (Updating public graph)"
NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', "test")
NEO4J_URL = os.environ.get('NEO4J_URL', "bolt://localhost")

TWITTER_BEARER = decrypt_value(os.environ['TWITTER_BEARER'])

if event and event.get("resources"):
if "CommunityGraphTwitterImportPublic" in event["resources"][0]:
version_updated = "Updating public graph"
NEO4J_PASSWORD = decrypt_value(os.environ['NEO4J_PASSWORD'])
NEO4J_URL = os.environ.get('NEO4J_PUBLIC_URL')
elif "CommunityGraphTwitterImportPrivate" in event["resources"][0]:
version_updated = "Updating private graph"
NEO4J_PASSWORD = decrypt_value(os.environ['NEO4J_PRIVATE_PASSWORD'])
NEO4J_URL = os.environ.get('NEO4J_PRIVATE_URL')

neo4jPass = NEO4J_PASSWORD
bearerToken = TWITTER_BEARER
neo4jUrl = NEO4J_URL
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
search = os.environ.get("TWITTER_SEARCH")

print(version_updated)
import_links(neo4jUrl=neo4jUrl, neo4jUser=neo4jUser, neo4jPass=neo4jPass, bearerToken=bearerToken, search=search)


if __name__ == "__main__":
neo4jPass = os.environ.get('NEO4J_PASSWORD', "test")
bearerToken = os.environ.get('TWITTER_BEARER', "")
neo4jUrl = os.environ.get('NEO4J_URL', "bolt://localhost")
neo4jUser = os.environ.get('NEO4J_USER', "neo4j")
search = os.environ.get("TWITTER_SEARCH",
'neo4j OR "graph database" OR "graph databases" OR graphdb OR graphconnect OR @neoquestions OR @Neo4jDE OR @Neo4jFr OR neotechnology')

import_links(neo4jUrl=neo4jUrl, neo4jUser=neo4jUser, neo4jPass=neo4jPass, bearerToken=bearerToken, search=search)
138 changes: 138 additions & 0 deletions GenerateSummaryPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import datetime
import sys
import boto
import flask
import time

from ago import human
from flask import render_template
from neo4j.v1 import GraphDatabase
from datetime import tzinfo, timedelta, datetime

ZERO = timedelta(0)

class UTC(tzinfo):
def utcoffset(self, dt):
return ZERO
def tzname(self, dt):
return "UTC"
def dst(self, dt):
return ZERO

products = {
"neo4j": {
"url": "138.197.15.1",
"user": "all",
"password": "readonly",
"title": "Neo4j",
"summary": "twin4j"
},
"graphql": {
"url": "107.170.69.23",
"user": "graphql",
"password": "graphql",
"title": "GraphQL",
"summary": "twigraphql"
}
}


def github_links(tx):
records = []
for record in tx.run("""\
MATCH (n:Repository) WHERE EXISTS(n.created) AND n.updated > timestamp() - 7 * 60 * 60 * 24 * 1000
WITH n
ORDER BY n.updated desc
MATCH (n)<-[:CREATED]-(user) WHERE NOT (user.name IN ["neo4j", "neo4j-contrib"])
RETURN n.title, n.url, n.created, n.favorites, n.updated, user.name, n.created_at, n.updated_at
ORDER BY n.updated desc
"""):
records.append(record)
return records


def twitter_links(tx):
records = []
for record in tx.run("""\
WITH ((timestamp() / 1000) - (7 * 24 * 60 * 60)) AS oneWeekAgo
MATCH (l:Link)<--(t:Tweet:Content)
WHERE not(t:Retweet)
WITH oneWeekAgo, l, t
ORDER BY l.cleanUrl, toInteger(t.created)
WITH oneWeekAgo, l.cleanUrl AS url, l.title AS title, collect(t) AS tweets WHERE toInteger(tweets[0].created) is not null AND tweets[0].created > oneWeekAgo AND not url contains "neo4j.com"
RETURN url, title, REDUCE(acc = 0, tweet IN tweets | acc + tweet.favorites + size((tweet)<-[:RETWEETED]-())) AS score, tweets[0].created * 1000 AS dateCreated, [ tweet IN tweets | head([ (tweet)<-[:POSTED]-(user) | user.screen_name]) ] AS users
ORDER BY score DESC
"""):
records.append(record)
return records


def meetup_events(tx):
records = []
for record in tx.run("""\
MATCH (event:Event)<-[:CONTAINED]-(group)
WHERE timestamp() + 7 * 60 * 60 * 24 * 1000 > event.time > timestamp() - 7 * 60 * 60 * 24 * 1000
RETURN event, group
ORDER BY event.time
"""):
records.append(record)
return records

app = flask.Flask('my app')


@app.template_filter('humanise')
def humanise_filter(value):
return human(datetime.fromtimestamp(value / 1000), precision=1)


@app.template_filter("shorten")
def shorten_filter(value):
return (value[:75] + '..') if len(value) > 75 else value


def generate_page(product):
driver = GraphDatabase.driver("bolt://{0}:7687".format(product["url"]), auth=(product["user"], product["password"]))
with driver.session() as session:
github_records = session.read_transaction(github_links)
twitter_records = session.read_transaction(twitter_links)
meetup_records = session.read_transaction(meetup_events)

with app.app_context():
utc = UTC()
time_now = str(datetime.now(utc))

rendered = render_template('index.html',
github_records=github_records,
twitter_records=twitter_records,
meetup_records=meetup_records,
title=product["title"],
time_now=time_now)

local_file_name = "/tmp/{0}.html".format(product["summary"])
with open(local_file_name, "w") as file:
file.write(rendered.encode('utf-8'))

s3_connection = boto.connect_s3()
bucket = s3_connection.get_bucket(product["summary"])
key = boto.s3.key.Key(bucket, "{0}.html".format(product["summary"]))
key.set_contents_from_filename(local_file_name)


def lambda_handler(event, context):
print("Event:", event)

product_name = "neo4j"
if event and event.get("resources"):
if "GraphQLGenerateSummaryPage" in event["resources"][0]:
product_name = "graphql"
if "Neo4jGenerateSummaryPage" in event["resources"][0]:
product_name = "neo4js"

generate_page(products[product_name])


if __name__ == "__main__":
args = sys.argv[1:]
product_name = args[0] if args[0:] else "neo4j"
generate_page(products[product_name])
Loading