Function for visualizing a Twitter graph
by Olof Björck, Joakim Johansson, Rania Sahioun, Raazesh Sainudiin and Ivan Sadikov
This is part of Project MEP: Meme Evolution Programme and supported by databricks, AWS and a Swedish VR grant.
Copyright 2016-2020 Olof Björck, Joakim Johansson, Rania Sahioun, Ivan Sadikov and Raazesh Sainudiin
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
/** Displays an interactive visualization of a Twitter graph.
*
* This function takes a DataFrame from TTTDFfunctions function tweetsDF2TTTDF()
* and displays it in an interactive visualization. The DataFrame content should
* be a collection of Twitter users.
*
* Visualization description:
*
* This is a graph visualization of a Twitter graph. Each circle represents a
* Twitter user (that is, a unique Twitter account). The radius of the user circle
* represents how many Retweets the user has within this specific Twitter network
* from small radius (fewest Retweets) to large radius (most Retweets). If you
* hover a user circle, the user screen name (as specified in the data) will appear
* by the user circle. The line thickness connecting user circles represents how
* many Retweets the user circle tuple tuple has between each other in total.
* Retweet direction is not accounted for. Connecting lines only appear if the user
* circle tuple has at least tupleWeightCutOff Retweets.
*
* @param TTTDF A DataFrame from TTTDFfunctions function tweetsDF2TTTDF() containing
* the data to be displayed.
* @param tupleWeightCutOff The display width in pixels.
* @param width The display width in pixels.
* @param height The display height in pixels.
*/
def visualizeGraph(TTTDF: DataFrame, tupleWeightCutOff: Int = 15, width: Int = 900, height: Int = 400): Unit = {
// Get user info from the "primary" users in our network. They are the users we've specified.
val usersInfo = TTTDF
.select("CPostUserID", "CPostUserSN", "followersCount")
.groupBy("CPostUserID", "CPostUserSN")
.agg(max("followersCount").as("followersCount"))
.toDF(Seq("id", "ScreenName", "followersCount"): _*)
.select("id", "ScreenName")
// Get all retweets and their weights (weights == 1 as we haven't done anything yet)
val RTNetworkDF = TTTDF
.filter($"TweetType"==="ReTweet") // TweetType PARAM
.select("OPostUserSNinRT", "CPostUserSN", "OPostUserIdinRT","CPostUserId","Weight")
// Get all (directed) retweet tuples // PARAM - case by case depending on the TweetType PARAM since SN RT QT etc are Akin's TTTDF SQL names
val weightedRTNetworkDF = RTNetworkDF
.groupBy("OPostUserSNinRT", "CPostUserSN", "OPostUserIdinRT","CPostUserId")
.agg(sum("Weight").as("Weight"))
.orderBy($"Weight".desc)
.toDF("source", "target", "sourceId", "targetId", "weight")
// Get the out degree of each user. That is, get only the number of times a user is retweeted. We're now losing the tuples.
val outDegreeOfOPostUserIdinRTNetwork = weightedRTNetworkDF
.select("sourceId","weight")
.groupBy("sourceId")
.agg(sum("weight").as("weight"))
.orderBy($"weight".desc)
.toDF(Seq("id", "weight"): _*)
// Get the nodes
val nodes = usersInfo
.join(outDegreeOfOPostUserIdinRTNetwork, Seq("id")).withColumn("group", lit(1))
.toDF(Seq("idNr", "id", "weight", "group"): _*)
.where("id is not null")
.toDF(Seq("idNr", "id", "weight", "group"): _*)
// Get links
val linksSource = nodes.select("idNr")
.join(weightedRTNetworkDF, $"idNr" === $"sourceId", "left_outer")
.select("source", "target", "sourceId", "targetId", "weight")
val links = nodes.select("idNr")
.join(linksSource, $"idNr" === $"targetId", "left_outer")
.select("source", "target", "weight")
.where("source is not null")
.where($"weight" >= tupleWeightCutOff)
.toDF(Seq("source", "target", "weight"): _*)
// Get JSON data
val nodesData = nodes.toJSON.collect;
val linksData = links.toJSON.collect;
// CSS code
val visualizeGraphCSS: String =
s"""
/*
General styling
*/
body {
font-family: Sans-Serif;
width: 100%;
height: 100%;
margin: 0;
margin-bottom: 100px;
background-color: #FFF;
overflow: scroll;
}
/*
Page content
*/
div.start {
text-align: center;
}
ul.start {
margin-top: 0;
display: inline-block;
text-align: left;
}
div.text {
text-align: left;
margin-left: 19.1%;
margin-right: 19.1%;
}
/*
For visualizations
*/
.visualization {
display: block;
position: relative;
align: center;
}
.hidden {
visibility: hidden;
}
.visible {
visibility: visible;
}
.zoom {
cursor: move;
fill: none;
pointer-events: fill;
}
#graphVisualizationDiv {
width: auto;
height: auto;
position: relative;
align: center;
}
.links line {
stroke: #999;
stroke-opacity: 1;
}
.nodes circle {
fill: black;
}
circle.clicked {
fill: #1da1f2;
}
#tweet {
z-index: 100;
position: absolute;
left: 80%;
height: auto;
}
"""
// JavaScript code
val visualizeGraphJS: String =
s"""
/*******************************************************************************
This visualisation is a Twitter graph.
*******************************************************************************/
var circleEnlargeConstant = 2;
var circleClickedStrokeWidth = 5;
var maxRadius = 10;
// Specify display sizes.
var width = ${width},
height = ${height},
margin = {
top: 0.1 * height,
right: 0 * width,
bottom: 0.1 * height,
left: 0 * width
};
width = width - margin.left - margin.right;
height = height - margin.top - margin.bottom;
// Get div
var div = d3.select("#graphVisualizationDiv");
// Create svg
var svg = div.append("svg")
.attr("class", "visualization")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform",
"translate(" + margin.left + "," + margin.top + ")");
// Create zoom
var zoom = d3.zoom()
.on("zoom", zoomed);
// Create zoomable area
var zoomView = svg.append("rect")
.attr("class", "zoom")
.attr("width", width)
.attr("height", height)
.on("click", clickView)
.call(zoom)
// Create simulation
var simulation = d3.forceSimulation()
.force("charge", d3.forceManyBody().strength(-10))
.force("center", d3.forceCenter(width / 2, height / 2))
// Create loading text
var loading = svg.append("text")
.attr("y", height / 2)
.attr("x", width / 2)
.attr("text-anchor", "middle")
.text("Loading graph... Takes a couple of seconds");
var nodes = ${nodesData.mkString("[", ",\n", "]")};
var links = ${linksData.mkString("[", ",\n", "]")};
// Create links
var link = svg.append("g")
.attr("class", "links")
.selectAll("line")
.data(links)
.enter().append("line")
.attr("stroke-width", function(d) { return Math.sqrt(d.weight / 1000); });
// Create nodes
var node = svg.append("g")
.attr("class", "nodes")
.selectAll("circle")
.data(nodes)
.enter().append("circle")
//.attr("fill", function(d) { return color(d.group); })
.attr("r", function(d) { return Math.sqrt(d.weight / 100) + 2 })
.on("mouseover", mouseoverCircle)
.on("mouseout", mouseoutCircle)
.on("click", clickCircle)
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended));
// Add title as child to circle
node.append("title")
.text(function(d) { return d.id; });
// Link nodes and links to the simulation
simulation
.nodes(nodes)
.on("tick", ticked)
.force('link', d3.forceLink(links).id(function(d) { return d.id; }));
// Updates for each simulation tick
function ticked() {
link
.attr("x1", function(d) { return d.source.x; })
.attr("y1", function(d) { return d.source.y; })
.attr("x2", function(d) { return d.target.x; })
.attr("y2", function(d) { return d.target.y; });
node
.attr("cx", function(d) { return d.x; })
.attr("cy", function(d) { return d.y; })
}
// Compute several steps before rendering
loading.remove(); // Remove loading text
for (var i = 0, n = 150; i < n; ++i) {
simulation.tick();
}
/**
* Handle mouse hover on circle. Enlarge circle.
*/
function mouseoverCircle() {
// Get circle
var circle = d3.select(this);
// Display activated circle
circle.attr("r", circle.attr("r") * circleEnlargeConstant);
}
/**
* Handle mouse out on circle. Resize circle.
*/
function mouseoutCircle() {
// Get circle
var circle = d3.select(this);
// Display idle circle
circle.attr("r", circle.attr("r") / circleEnlargeConstant);
}
/**
* Handle circle drag start.
*/
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}
/**
* Handle circle drag.
*/
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
/**
* Handle circle drag end.
*/
function dragended(d) {
if (!d3.event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
}
/**
* Handle zoom. Zoom both x-axis and y-axis.
*/
function zoomed() {
d3.selectAll(".nodes").attr("transform", d3.event.transform)
d3.selectAll(".links").attr("transform", d3.event.transform)
}
/**
* Handle click on zoomable area. That is, handle click outside a node which
* is considered a deselecting click => deselect previously clicked node
* and remove displayed tweets.
*/
function clickView() {
// Remove clicked status on clicked nodes
d3.selectAll(".clicked")
.attr("stroke-width", "0")
.classed("clicked", false)
// Remove timeline
document.getElementById("tweet").innerHTML = ""
}
/**
* Handle click on a tweet circle. Display the clicked tweet and let the tweet
* appear selected by adding a stroke to it.
*/
function clickCircle(d) {
// Remove results from old click
clickView();
// Add stroke width and set clicked class
d3.select(this)
.attr("stroke-width", circleClickedStrokeWidth)
.classed("clicked", true);
// Display tweet
twttr.widgets.createTimeline(
{
sourceType: "profile",
userId: d.idNr
},
document.getElementById("tweet"), // Tweet div
{
height: height
}
)
}
"""
// HTML code
displayHTML(s"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="author" content="Olof Björck">
<title>Graph</title>
<style>
${visualizeGraphCSS}
</style>
</head>
<body>
<div id="graphVisualizationDiv" class="visualization" align="center">
<div id="tweet"></div>
</div>
<script sync src="https://platform.twitter.com/widgets.js"></script>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
${visualizeGraphJS}
</script>
</body>
</html>
""")
}
// Info
println("""
*** WARNING: ***
THERE IS NO SIZE CHECKING! Driver might crash as DataFrame.collect is used.
Also, the display can only handle so and so many items at once. Too large
dataset and the visualization might be very laggy or crash.
*** USAGE: ***
visualizeGraph(TTTDF)
****************
""")
*** WARNING: ***
THERE IS NO SIZE CHECKING! Driver might crash as DataFrame.collect is used.
Also, the display can only handle so and so many items at once. Too large
dataset and the visualization might be very laggy or crash.
*** USAGE: ***
visualizeGraph(TTTDF)
****************
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
visualizeGraph: (TTTDF: org.apache.spark.sql.DataFrame, tupleWeightCutOff: Int, width: Int, height: Int)Unit