icamp-blog-network.R

Uploaded by:fwild

              
library(network)
library(sna)

setwd("~/public/data")

traces = read.csv("~/public/data/icamp-blog-network.csv")

t = as.character(traces[,"feed_action"])
t[which(substr(t,1,17)=="sent notification")] = "sent notification"
t[which(substr(t,1,18)=="sent advertisement")] = "sent advertisement"

# cleaning
traces[,"feed_local_user"] = tolower(traces[,"feed_local_user"])
traces[,"feed_remote_channel_uri"] = tolower(gsub("[[:space:]]","",traces[,"feed_remote_channel_uri"]))
traces[which(traces[,"feed_remote_channel_uri"] == "blog.course.isikun.edu.tr/icamptrial2"),"feed_remote_channel_uri"] = "http://blog.course.isikun.edu.tr/icamptrial2"
traces[which(traces[,"feed_remote_channel_uri"] == "blog.course.isikun.edu.tr/icamptrial2/"),"feed_remote_channel_uri"] = "http://blog.course.isikun.edu.tr/icamptrial2"
#names(table(traces[,"feed_remote_channel_uri"]))

# extract paths from remote channeluri
paths = as.character(traces[which(substr(traces[,"feed_remote_channel_uri"],1,33)=="http://blog.course.isikun.edu.tr/"),"feed_remote_channel_uri"]) # only on this server
paths = names(table(paths))
paths = substr(paths,34,nchar(paths))
paths = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", paths)
paths = names(table(paths))

# reverse lookup table paths -> usernames
u = read.csv("icamp-blog-users2.csv", sep=";") # contains a lot of fake = spam users
p = paste("http:///", u[,"domain"], u[,"path"], sep="")
p = substr(p,1,(nchar(p)-1))
p2 = as.character(u[,"path"])
u = cbind(p, substr(p2,2,nchar(p2)-1), as.character(u[,"user"]))
colnames(u) = c("url","path", "user")

# which users have received log file entries?
users = names(table(traces[,"feed_local_user"]))
users = users[which(users != "")]

# add missing users (involved in trace paths, but never in the user field)
pathusers = u[ which(u[,"path"] %in% paths), "user"]
users = unique(c(users, pathusers))

# the following paths were found but have no user 
lost = paths[! paths %in% u[,"path"]]
pathusers[! pathusers %in% users] # should be none
lost[lost %in% users] # are the ones that have traces, but are no longer in the userlist

# uncomment this to include the deleted blogs
users = unique(c(users,lost))

# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# create the interaction matrix

u2u = matrix(0, ncol=length(users), nrow=length(users))
rownames(u2u) = users
colnames(u2u) = users





# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# offers

offers = traces[ which(t == "sent subscription offer"),]
nrow(offers)
successful = offers[which(offers[,"description"]=="Success"),]

for ( i in 1:nrow(successful) ) {
  
  from = as.character(successful[i,"feed_local_user"])
  to = as.character(successful[i,"feed_remote_channel_uri"])
  
  if ( substr(to, 1, 33) =="http://blog.course.isikun.edu.tr/" ) {
    to = substr(to,34,nchar(to))
    to = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", to)
    if (! to %in% lost) {
      to = u[ which(u[,"path"]==to), "user"]
    } else {
      print(paste("   lost user",to," included"))
      # uncomment this to exclude lost users
      # next()
    }
    u2u[from, to] = u2u[from, to] + 1
    
  } else {
    print("different domain")
  }
  
} # for successful offers

# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# requests

# "requested subscription" => outgoing call for feedback.request
requests = traces[ which(t == "requested subscription"),]
nrow(requests)

# any accepted, but request not successful?
successful = requests[which(requests[,"description"]=="Success"),]
nrow(successful)

for ( i in 1:nrow(successful) ) {
  
  from = as.character(successful[i,"feed_local_user"])
  to = as.character(successful[i,"feed_remote_channel_uri"])
  
  if ( substr(to, 1, 33) =="http://blog.course.isikun.edu.tr/" ) {
    to = substr(to,34,nchar(to))
    to = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", to)
    if (! to %in% lost) {
      to = u[ which(u[,"path"]==to), "user"]
    } else {
      print(paste("   lost user",to," included"))
      # uncomment this to exclude lost users
      # next()
    }
    u2u[from, to] = u2u[from, to] + 1
    
  } else {
    print("different domain")
  }
  
} # subscription requests


# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# notifications

# "received notification" => incoming call of feedback.notify

notifies = traces[ which(t == "received notification"),]
nrow(notifies)

# any accepted, but request not successful?
successful = notifies[which(notifies[,"description"]=="Success"),]
nrow(successful)

w = NULL
for ( i in 1:nrow(successful) ) {
  
  from = as.character(successful[i,"feed_local_user"])
  to = as.character(successful[i,"feed_remote_channel_uri"])
  
  if (from == "") next()
  
  if ( substr(to, 1, 33) =="http://blog.course.isikun.edu.tr/" ) {
    to = substr(to,34,nchar(to))
    to = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", to)
    if (! to %in% lost) {
      to = u[ which(u[,"path"]==to), "user"]
    } else {
      print(paste("   lost user",to," included"))
      # uncomment this to exclude lost users
      # next()
    }
    u2u[to, from] = u2u[to, from] + 1
    
  } else {
    print("different domain")
    w = c(w,to)
  }
  
} # notifies

# number of errors:
length(w)

# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# replies

# "replied" => user clicked on reply for a posting

replies = traces[ which(t == "replied"),]
nrow(replies)

# any accepted, but request not successful?
successful = replies
nrow(successful)

for ( i in 1:nrow(successful) ) {
  
  from = as.character(successful[i,"feed_local_user"])
  to = as.character(successful[i,"feed_remote_channel_uri"])
  
  if (from == "") next()
  
  if ( substr(to, 1, 33) =="http://blog.course.isikun.edu.tr/" ) {
    to = substr(to,34,nchar(to))
    to = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", to)
    if (! to %in% lost) {
      to = u[ which(u[,"path"]==to), "user"]
    } else {
      print(paste("   lost user",to," included"))
      # uncomment this to exclude lost users
      # next()
    }
    u2u[from, to] = u2u[from, to] + 1
    
  } else {
    print("different domain")
  }
  
} # replies

# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  
# advertisements

# "sent advertisement" => outgoing call for feedback.offer *with* offered url not being the sender 

ads = traces[ which(t == "sent advertisement"),]
nrow(ads)

# any accepted, but request not successful?
successful = ads[which(ads[,"description"]=="Success"),]
nrow(successful)

for ( i in 1:nrow(successful) ) {
  
  from = as.character(successful[i,"feed_local_user"])
  to = as.character(successful[i,"feed_remote_channel_uri"])
  
  if (from == "") next()
  
  if ( substr(to, 1, 33) =="http://blog.course.isikun.edu.tr/" ) {
    to = substr(to,34,nchar(to))
    to = gsub("([[:alnum:]\\.\\_\\-]*)\\/?.*","\\1", to)
    if (! to %in% lost) {
      to = u[ which(u[,"path"]==to), "user"]
    } else {
      print(paste("   lost user",to," included"))
      # uncomment this to exclude lost users
      # next()
    }
    u2u[from, to] = u2u[from, to] + 1
    
  } else {
    print("different domain")
  }
  
} # advertisements


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# vizualize

citgrey <- function( m, mmax ) {
  if (any(m>10)) { m[which(m>10)]=10 }
  mmax=10
  return( grey(1-((m+1)/(mmax+1))) )
}
citgrey <- function( m, mmax ) {
  if (any(m>mmax)) { m[which(m>mmax)]=mmax }
  v = 1-((ceiling(m)+1)/(ceiling(mmax)+1))
  return( grey(v) )
}

cs = c( "#FF9900", "#8AD71B", "#FFCC00", "#6ABFF5", "#8E4CE8", "#D02D2D", "#2AB8BD", "#BDA32A", "#1B2B8B", "#6A8B1B", "#1B728B", "#5A1B8B", "#828282", "#75A982" )

# clean users

delusers = c("privatcams266", "ssssuussliklichnostj3", "srrrringones3", "livesexcam7892", "eringtooonnn3", 
             "freepiceatpussy9281", "baldbigpussy6619", "mmmringlonermol3", 
             "allliibbaaabbaa3", "personfinder1438", "wewwfwfwdfwee3", "bigronalddrozden3", "djromeofleish3", "asfasfaafaf3",
             "adanti", "facebooklayouts", "affiliateguidesneo", "lauramcfarland", "mikestewart4352", "2moons680", "kelvirajas7314", "wowgold16o1"
)

userssp = c("alex", "alvaro", "estrella", "hector", "jsaez", "pokesalad", "rafaelsf80", "rdelgado", "saez100", "vatemu", "paul")
delusers = c(delusers, userssp)

u2u1 = u2u[(!users %in% delusers), (!users %in% delusers)]
rownames(u2u1)[which(rownames(u2u1)=="sebastianfiedler")] = "sebastian"
colnames(u2u1)[which(colnames(u2u1)=="sebastianfiedler")] = "sebastian"
rownames(u2u1)[which(rownames(u2u1)=="kaipata")] = "kai"
colnames(u2u1)[which(colnames(u2u1)=="kaipata")] = "kai"


save(u2u1, file="~/public/data/icamp-blog-network.rda")


net = network(u2u1, directed=TRUE)

v2 = ceiling(log(round(prestige(u2u1))))
v2[which(is.infinite(v2))] = 0
v2 = (v2+1)/2
names(v2) = rownames(u2u1)

#filter out isolates for label.cex (bug in plot.network!):
re = names(which(rowSums(u2u1)==0))
ce = names(which(colSums(u2u1)==0))
e = ce[ce%in%re]
v3 = v2[(!names(v2) %in% e)]
v3[which(v3==0.5)] = 1

pdf(file="~/public/gallery/icamp-net-trial2.pdf")
plot(net, 
     usearrows=TRUE, edge.col=citgrey(u2u1,floor(mean(u2u1[which(u2u1 !=0)]))), 
     vertex.col=cs[1], vertex.cex=v2,
     displaylabels=T, boxed.labels=F, label.pad=0.2,
     displayisolates=F, arrowhead.cex = 0.4, vertex.sides=40, usecurve=T, edge.curve=.03, edge.steps = 100, loop.steps = 100,
     label.cex = v3/3 # label.cex=0.5,
)
dev.off()


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# ALL STATUS CODES
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

# "sent subscription offer" => outgoing call of feedback.offer
# "rejected offer" => user clicked on "reject" with an incoming offer
# "accepted offer" => user has clicked on "accept"
# "received feedback offer" => an offer has been sent and it was received by the opposite party 
#      (= same for users in the system as "sent subscription offer" with description = "successful")
# "received feedback request"=> incoming call of feedback.request

# "requested subscription" => outgoing call for feedback.request

# "replied" => user clicked on reply for a posting
# "received notification" => incoming call of feedback.notify
# "sent notification" => outgoing call of feedback.notify

# "sent advertisement" => outgoing call for feedback.offer *with* offered url not being the sender 
#                         url (e.g. I advertise the greenpeace blog through my blog)?

# "deleted subscriber" => user has clicked on cancel on one of his subscribers, so no more notifies will be sent
# "deleted subscription" => user has cancelled a subscription: notifications that pour-in for a certain feed will not be processed anymore

# "activated pugin" => has clicked on plugin->activate
# "deactivated plugin" => has clicked on plugin->deactivate

# "blogroll" => user clicked on "make this subscription visible in my blogroll"
# "changed description" => user renamed the channel name

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -