Monday, 25 January 2016
A Quick hack of the -Domino Data Lab .... http://blog.dominodatalab.com/geographic-visualization-with-rs-ggmaps/
I Sincerely thought id found the bug till i scrolled down on their visitor comments to see that misiti- https://disqus.com/by/misiti3780/ , had pointed it out earlier , well here is the post anyways :)
My results are different as i have plotted on - mydata$OT.Number
mydata = read.csv("vehicle-accidents.csv")
mydata$State <- as.character(mydata$State)
mydata$MV.Number = as.numeric(mydata$MV.Number)
mydata = mydata[mydata$State != "Alaska", ]
mydata = mydata[mydata$State != "Hawaii", ]
#
for (i in 1:nrow(mydata)) {
latlon = geocode(mydata[i,1])
mydata$lon[i] = as.numeric(latlon[1])
mydata$lat[i] = as.numeric(latlon[2])
}
#-
mv_num_collisions = data.frame(mydata$OT.Number, mydata$lon, mydata$lat)
# MV Number to OT Number
#
#
colnames(mv_num_collisions) = c('collisions','lon','lat')
#
usa_center = as.numeric(geocode("United States"))
#
USAMap = ggmap(get_googlemap(center=usa_center, scale=2, zoom=4), extent="normal")
str(USAMap)
#
circle_scale_amt=0.75
USAMap +
geom_point(aes(x=lon, y=lat), color="red",data=mv_num_collisions, alpha=0.4,size=mv_num_collisions$collisions*circle_scale_amt) +
scale_size_continuous(range=range(mv_num_collisions$collisions))
List of 9
$ data :'data.frame': 4 obs. of 2 variables:
..$ lon: num [1:4] -123.8 -67.5 -123.8 -67.5
..$ lat: num [1:4] 11.7 11.7 56.1 56.1
..- attr(*, "out.attrs")=List of 2
.. ..$ dim : Named int [1:2] 2 2
.. .. ..- attr(*, "names")= chr [1:2] "lon" "lat"
.. ..$ dimnames:List of 2
.. .. ..$ lon: chr [1:2] "lon=-123.79395" "lon= -67.54395"
.. .. ..$ lat: chr [1:2] "lat=11.73830" "lat=56.09656"
$ layers :List of 3
..$ :Classes 'LayerInstance', 'Layer', 'ggproto' <ggproto object: Class LayerInstance, Layer>
aes_params: list
compute_aesthetics: function
compute_geom_1: function
compute_geom_2: function
compute_position: function
compute_statistic: function
data: waiver
draw_geom: function
geom: <ggproto object: Class GeomBlank, Geom>
aesthetics: function
default_aes: uneval
draw_group: function
draw_key: function
draw_layer: function
draw_panel: function
extra_params: na.rm
handle_na: function
non_missing_aes:
parameters: function
required_aes:
setup_data: function
use_defaults: function
super: <ggproto object: Class Geom>
geom_params: list
inherit.aes: TRUE
map_statistic: function
mapping: NULL
position: <ggproto object: Class PositionIdentity, Position>
compute_layer: function
compute_panel: function
required_aes:
setup_data: function
setup_params: function
super: <ggproto object: Class Position>
print: function
show.legend: NA
stat: <ggproto object: Class StatIdentity, Stat>
compute_group: function
compute_layer: function
compute_panel: function
default_aes: uneval
extra_params: na.rm
non_missing_aes:
parameters: function
required_aes:
retransform: TRUE
setup_data: function
setup_params: function
super: <ggproto object: Class Stat>
stat_params: list
subset: NULL
super: <ggproto object: Class Layer>
..$ :Classes 'LayerInstance', 'Layer', 'ggproto' <ggproto object: Class LayerInstance, Layer>
aes_params: list
compute_aesthetics: function
compute_geom_1: function
compute_geom_2: function
compute_position: function
compute_statistic: function
data: waiver
draw_geom: function
geom: <ggproto object: Class GeomRasterAnn, Geom>
aesthetics: function
default_aes: uneval
draw_group: function
draw_key: function
draw_layer: function
draw_panel: function
extra_params:
handle_na: function
non_missing_aes:
parameters: function
required_aes:
setup_data: function
use_defaults: function
super: <ggproto object: Class Geom>
geom_params: list
inherit.aes: TRUE
map_statistic: function
mapping: NULL
position: <ggproto object: Class PositionIdentity, Position>
compute_layer: function
compute_panel: function
required_aes:
setup_data: function
setup_params: function
super: <ggproto object: Class Position>
print: function
show.legend: NA
stat: <ggproto object: Class StatIdentity, Stat>
compute_group: function
compute_layer: function
compute_panel: function
default_aes: uneval
extra_params: na.rm
non_missing_aes:
parameters: function
required_aes:
retransform: TRUE
setup_data: function
setup_params: function
super: <ggproto object: Class Stat>
stat_params: list
subset: NULL
super: <ggproto object: Class Layer>
..$ :Classes 'LayerInstance', 'Layer', 'ggproto' <ggproto object: Class LayerInstance, Layer>
aes_params: list
compute_aesthetics: function
compute_geom_1: function
compute_geom_2: function
compute_position: function
compute_statistic: function
data: data.frame
draw_geom: function
geom: <ggproto object: Class GeomRect, Geom>
aesthetics: function
default_aes: uneval
draw_group: function
draw_key: function
draw_layer: function
draw_panel: function
extra_params: na.rm
handle_na: function
non_missing_aes:
parameters: function
required_aes: xmin xmax ymin ymax
setup_data: function
use_defaults: function
super: <ggproto object: Class Geom>
geom_params: list
inherit.aes: FALSE
map_statistic: function
mapping: uneval
position: <ggproto object: Class PositionIdentity, Position>
compute_layer: function
compute_panel: function
required_aes:
setup_data: function
setup_params: function
super: <ggproto object: Class Position>
print: function
show.legend: FALSE
stat: <ggproto object: Class StatIdentity, Stat>
compute_group: function
compute_layer: function
compute_panel: function
default_aes: uneval
extra_params: na.rm
non_missing_aes:
parameters: function
required_aes:
retransform: TRUE
setup_data: function
setup_params: function
super: <ggproto object: Class Stat>
stat_params: list
subset: NULL
super: <ggproto object: Class Layer>
$ scales :Classes 'ScalesList', 'ggproto' <ggproto object: Class ScalesList>
add: function
clone: function
find: function
get_scales: function
has_scale: function
input: function
n: function
non_position_scales: function
scales: list
super: <ggproto object: Class ScalesList>
$ mapping :List of 2
..$ x: symbol lon
..$ y: symbol lat
$ theme : list()
$ coordinates:Classes 'CoordMap', 'Coord', 'ggproto' <ggproto object: Class CoordMap, Coord>
aspect: function
distance: function
is_linear: function
labels: function
limits: list
orientation: NULL
params: list
projection: mercator
range: function
render_axis_h: function
render_axis_v: function
render_bg: function
render_fg: function
train: function
transform: function
super: <ggproto object: Class CoordMap, Coord>
$ facet :List of 1
..$ shrink: logi TRUE
..- attr(*, "class")= chr [1:2] "null" "facet"
$ plot_env :<environment: 0x000000001fd04120>
$ labels :List of 6
..$ x : chr "lon"
..$ y : chr "lat"
..$ xmin: chr "xmin"
..$ xmax: chr "xmax"
..$ ymin: chr "ymin"
..$ ymax: chr "ymax"
- attr(*, "class")= chr [1:2] "gg" "ggplot"
> scale_size_continuous(range=range(mv_num_collisions$collisions))
<ScaleContinuous>
Range:
Limits: 0 -- 1
> #
> USAMap +
+ geom_point(aes(x=lon, y=lat), data=mv_num_collisions, col="orange", alpha=0.4) +
+ scale_size_continuous(range=range(mv_num_collisions$collisions))
Error in layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint, :
object 'circle_scale_amt' not found
>
Wednesday, 13 January 2016
Initial Analysis of Tweets - State of the Union Address by President Barack Obama
https://twitter.com/search?src=typd&q=%23SOTU
Initial Analysis of the Tweets around the State of the Union Address by President Barack Obama
a detailed analysis shall follow soon ....
Some of the Tweets which have been mined can be seen below -
Initial Analysis of the Tweets around the State of the Union Address by President Barack Obama
a detailed analysis shall follow soon ....
Some of the Tweets which have been mined can be seen below -
tweet \ 0 RT @bungdan: @KevinMKruse @dandrezner @CombatC... 1 RT @MatthewACherry: At least now we know who t... 2 RT @Debi129: We pledge to make sure no man bor... 3 @NigeriaNewsdesk @todayng Obama has better thi... 4 Obama sold missiles to Cuba that can do harm t... 5 @BCfromBlo And this is Paul Ryan's characteriz... 6 RT @LorettaLynch: Great to talk #CriminalJusti... 7 @GovHaleySC Seems like the "siren" is you Hale... 8 RT @whitehouseostp: Liked #SOTU? Then tune in ... 9 RT @_bateeeeeey: #SOTU Paul Ryan looking like ... 10 #3: 31ピース クムクムパズル 映画妖怪ウォッチ エンマ大王と5つの物語だニャン! U... 11 RT @ForestWhitaker: I share @POTUS's optimism ... 12 Lol nice https://t.co/32j8DE4rii 13 RT @ambivertido: Obama en su primer y último D...
.............
21 @nikkihaley have you seen what's happening to ... 22 #2PeasInAPod @HillaryClinton will import milli... 23 RT @Ronaldodom22: I am leaving an #EmptySeat f... 24 RT @AshRenee7745: Without watching the #SOTU I... 25 https://t.co/i3G1960sWl vía @Etsy #usa #uk #ca... 26 RT @JaySekulow: #Iran captures our soldiers &a... 27 RT @RedComunismo: Barack Obama, ganador del No... 28 RT @SkyNewsBreak: U.S. military says 10 sailor... 29 RT @Sultanknish: Nikki Haley Didn't Have a Pro... ... ... 6180 President Obama's final State of the Union add... 6181 RT @CSDHI: .@GerardLAUTON 3 exécutions par jou... 6182 Bruselas estudiará en los próximos meses la op... 6183 RT @cjwerleman: Are we really going to lecture... 6184 RT @lara_legunda: Creo que mi mama usa más el ... 6185 Trump vs. Cruz https://t.co/o0Cwqobi1q @youths... 6186 OIL FOR THE LAMPS OF CHINA PAT O'BRIEN JEAN ...
AWS.Amazon.com RStudio server on UBUNTU 14.04 #rstats
AWS.Amazon.com RStudio server on UBUNTU 14.04 #rstats
|
|
Monday, 11 January 2016
Twitter API - LIVE Stream Mining PART - II
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
access_token = '7###############d'
access_token_secret = 'A#############
consumer_key = 'Q###########1'
consumer_secret = 'S##################x'
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
if __name__ == '__main__':
#This handles Twitter authentication and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#Key word - OBAMA ....
stream.filter(track=['OBAMA '])
for tweet in stream:
print tweet['text']
#DATA Mining Tweets which mention - OBAMA .....only 1 tweets JSON dump shown rest all truncated .............
{"created_at":"Mon Jan 11 21:36:39 +0000 2016","id":686663033926594562,"id_str":"686663033926594562","text":"@McCormackJohn @JonathanLanday Will be awkward when they get up to clap when Obama talks about helping poor, climate change, Iran deal, etc.","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":686662207858438145,"in_reply_to_status_id_str":"686662207858438145","in_reply_to_user_id":27689907,"in_reply_to_user_id_str":"27689907","in_reply_to_screen_name":"McCormackJohn","user":{"id":2208906530,"id_str":"2208906530","name":"Ali Ahmadi","screen_name":"AliR_Ahmadi","location":"Tehran-NYC","url":null,"description":"Researcher and writer focusing on Iran, the Middle East and sources of US foreign policy towards the region. Personal account.","protected":false,"verified":false,"followers_count":878,"friends_count":739,"listed_count":38,"favourites_count":1374,"statuses_count":9729,"created_at":"Fri Nov 22 12:41:04 +0000 2013","utc_offset":-28800,"time_zone":"Pacific Time (US & Canada)","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/573155028506181632\/gV1_lfDs_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/573155028506181632\/gV1_lfDs_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/2208906530\/1428808169","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"McCormackJohn","name":"John McCormack","id":27689907,"id_str":"27689907","indices":[0,14]},{"screen_name":"JonathanLanday","name":"Jonathan Landay","id":47408060,"id_str":"47408060","indices":[15,30]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1452548199527"}
In [3]:#Import the necessary methods from tweepy library from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream import json import pandas as pd %matplotlib inline import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') #Variables that contains the user credentials to access Twitter API access_token = '7###########' access_token_secret = 'A############# consumer_key = 'Q########### consumer_secret = 'SO##################In [4]:tweets_data_path = 'C:/...../Twitter1/_US_Pres.txt' tweets_data = [] tweets_file = open(tweets_data_path, "r") for line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continueIn [5]:print len(tweets_data)1286In [6]:tweets = pd.DataFrame() tweets['tweet'] = map(lambda tweet: tweet['text'] if 'text' in tweet else None, tweets_data) tweets['created_at'] = map(lambda tweet: tweet['created_at'] if 'created_at' in tweet else None, tweets_data) tweets['user_id'] = map(lambda tweet: tweet['user']['id'] if 'user' in tweet else None, tweets_data) tweets['id_str'] = map(lambda tweet: tweet['user']['id_str'] if 'user' in tweet else None, tweets_data) tweets['username'] = map(lambda tweet: tweet['user']['name'] if 'user' in tweet else None, tweets_data) tweets['screen_name'] = map(lambda tweet: tweet['user']['screen_name'] if 'user' in tweet else None, tweets_data) tweets['location'] = map(lambda tweet: tweet['user']['location'] if 'user' in tweet else None, tweets_data) tweets['followers_count'] = map(lambda tweet: tweet['user']['followers_count'] if 'user' in tweet else None, tweets_data) tweets['friends_count'] = map(lambda tweet: tweet['user']['friends_count'] if 'user' in tweet else None, tweets_data) tweets['created_at'] = map(lambda tweet: tweet['user']['created_at'] if 'user' in tweet else None, tweets_data) tweets['user_lang'] = map(lambda tweet: tweet['user']['lang'] if 'user' in tweet else None, tweets_data) tweets['following'] = map(lambda tweet: tweet['user']['following'] if 'user' in tweet else None, tweets_data) tweets['geo'] = map(lambda tweet: tweet['geo'], tweets_data) # tweets['coordinates'] = map(lambda tweet: tweet['coordinates'], tweets_data) tweets['retweet_count'] = map(lambda tweet: tweet['retweet_count'], tweets_data) tweets['favorite_count'] = map(lambda tweet: tweet['favorite_count'], tweets_data) tweets['lang'] = map(lambda tweet: tweet['lang'], tweets_data) # if k1 in d and k2 in d[k1] tweets['country'] = map(lambda tweet: tweet['place']['country'] if 'country' in tweet else None, tweets_data) tweets['country'] = map(lambda tweet: tweet['place']['country'] if tweet['place'] != None else None, tweets_data) print (tweets)tweet \ 0 @McCormackJohn @JonathanLanday Will be awkward... 1 RT @trscoop: Ted Cruz: Unlike Obama, I don’t i... 2 @2ANow Well now, you've hit on the question Ob... 3 STATE OF THE LEGACY: Obama 'will be talking ab... 4 RT @KomaKazii: idc what nobody say MCK is trul... 5 RT @swornfavoritee: RT https://t.co/V84oCANXej 6 RT @PatriotGinger: MT @LibertyUSA1776: Let Oba... 7 RT @TalKopan: Paul Ryan says Nikki Haley has a... 8 RT @Drudge_Report_: #STATE OF #UNION: #Obama t... 9 RT @nytimes: President Obama’s guests at his S... 10 RT @SenateDems: At State of the Union, Michell... 11 Barack Obama prepara su último discurso sobre ... 12 RT @IndyDayspring: @PuritanBaptist https://t.... 13 Obama’s SOTU guest list includes gay rights a... 14 RT @jacobperry: "I hate you guys and want to d... 15 RT @gcamp4: Get Obama impeached contact your C... 16 RT @KonniBurton: #birthnuts indeed! MAN, we ne... 17 Fox's Wallace Falsely Claims Obama's Executive... 18 RT @PoliceFirstNow: BREAKING: Top US Admiral F... 19 RT @JudgeMoroz: Philly's mayor & Obama are... 20 rt https://t.co/eOpPRMYIJ6 21 RT @nytopinion: So what should we say about th... 22 Obama's closing it with Exec. Order end run ar... 23 He can suck a dick https://t.co/crPnkXeb0Y 24 RT @peddoc63: Obama only wants Guns to keep HI... 25 RT @NaughtyBeyotch: Obama Hiding Why Muslim Re... 26 New: Obama to make good on Guantanamo pledge: ... 27 RT @Lawsonbulk: GOP Rep: Gun Reform An Obama P... 28 Obama goes it alone in his last State of the U... 29 RT @mmorera79: Madre mía los controles Pa entr... ... ... 1256 RT @alvaroforero: Oposición decía: políticas d... 1257 RT @nytimes: Bernie Sanders on the "recklessne... 1258 Facts don’t lie but these meme sure does. Comp... 1259 #Lercio USA, Obama annuncia stretta sulla vend... 1260 Piden fin de redadas migratorias a Obama -->... 1261 RT @trscoop: Ted Cruz: Unlike Obama, I don’t i... 1262 RT @Ricky_Vaughn99: I will never understand wh... 1263 RT @rockenschtroodl: Hillary simply has no ans... 1264 RT @nowthisnews: Obama's executive action on g... 1265 Remember that “Mmm mmm mmm Barack Hussein Obam... 1266 @oefoif0506 could be lots more Obama is plann... 1267 Obama False Gun Control Exposed by Tireo serio... 1268 RT @adamjohnsonNYC: Watch the last 4 president... 1269 RT @Adel__Almalki: #news by #almalki: Obama to... 1270 Obama takes this year's State of Union to YouT... 1271 RT @pourmecoffee: @pourmecoffee Obama's Hip-Ho... 1272 RT @WSJ: Hillary Clinton proposes 4% income-ta... 1273 RT @Ryanamber711: Horror. Citizen Journalism ... 1274 YahooNews: Democratic presidential candidate H... 1275 RT @TehachapiHomes: Guantánamo Detainee Freed ... 1276 Hillary Clinton tiene el apoyo de la excongres... 1277 RT @ZaidJilani: Bernies support from latinos i... 1278 8 Unforgettable Moments From President Obama’s... 1279 RT @franktorresnet: RPOF says probe expansion ... 1280 RT @JohnJHarwood: BREAKING: in Iowa, Hillary C... 1281 RT @ColMorrisDavis: If U.S. courts and prisons... 1282 RT @MilitaryTimes: Obama taps Army vet for DoD... 1283 RT @Cap_Institute: BREAKING: Leaked Emails Rev... 1284 I love how GOP obstructionists get a complete ... 1285 #America @DineshDSouza warned us abt Obama yrs... created_at user_id id_str \ 0 Fri Nov 22 12:41:04 +0000 2013 2208906530 2208906530 1 Tue Oct 05 20:17:50 +0000 2010 199010943 199010943 2 Sat Nov 23 13:20:52 +0000 2013 2210672052 2210672052 3 Thu Oct 30 16:44:56 +0000 2014 2851626833 2851626833 4 Tue Sep 02 21:26:37 +0000 2014 2786720672 2786720672 5 Wed Oct 09 01:11:51 +0000 2013 1948239516 1948239516 6 Wed Oct 29 22:05:34 +0000 2008 17053388 17053388 7 Sat Oct 04 20:12:31 +0000 2008 16594804 16594804 8 Mon Jan 11 19:02:05 +0000 2016 4776816573 4776816573 9 Fri Nov 05 12:45:01 +0000 2010 212207495 212207495 10 Mon Nov 23 06:00:44 +0000 2015 4330934120 4330934120 11 Sat May 08 15:38:56 +0000 2010 141627342 141627342 12 Sun Feb 03 01:57:11 +0000 2013 1143919482 1143919482 13 Sat Apr 04 16:19:15 +0000 2015 3139235347 3139235347 14 Thu Apr 16 17:13:12 +0000 2009 31993522 31993522 15 Wed Jun 17 13:48:20 +0000 2015 3331284363 3331284363 16 Mon Apr 23 01:47:15 +0000 2012 560828139 560828139 17 Wed Oct 06 18:56:00 +0000 2010 199388764 199388764 18 Sat Sep 12 19:39:13 +0000 2009 73717434 73717434 19 Tue Dec 23 10:42:05 +0000 2014 2938154393 2938154393 20 Thu Dec 25 03:11:12 +0000 2014 2940242871 2940242871 21 Sat Nov 21 17:22:04 +0000 2015 4243588577 4243588577 22 Tue Mar 23 23:35:37 +0000 2010 125813890 125813890 23 Sat Jun 29 03:09:39 +0000 2013 1554587022 1554587022 24 Wed Apr 28 08:33:12 +0000 2010 137976256 137976256 25 Mon Mar 19 19:37:57 +0000 2012 529662586 529662586 26 Sun Jan 18 02:35:30 +0000 2015 2983431473 2983431473 27 Tue Mar 15 16:59:07 +0000 2011 266690646 266690646 28 Sun Jul 26 14:50:31 +0000 2015 3295404680 3295404680 29 Sun Mar 09 21:23:59 +0000 2014 2397833950 2397833950 ... ... ... ... 1256 Mon Apr 05 03:09:54 +0000 2010 129683990 129683990 1257 Wed Apr 15 15:55:37 +0000 2009 31439632 31439632 1258 Mon Jun 02 16:11:52 +0000 2014 2541779029 2541779029 1259 Mon Oct 06 06:30:52 +0000 2014 2809655590 2809655590 1260 Sun Jun 30 04:23:09 +0000 2013 1557207091 1557207091 1261 Mon Jun 11 13:51:25 +0000 2012 605517957 605517957 1262 Thu Sep 15 05:25:59 +0000 2011 373783051 373783051 1263 Wed May 06 01:37:30 +0000 2015 3237600915 3237600915 1264 Thu Jul 21 02:56:41 +0000 2011 339430631 339430631 1265 Sat Aug 22 02:28:23 +0000 2009 67790595 67790595 1266 Wed May 13 03:57:17 +0000 2015 3193836186 3193836186 1267 Sat Apr 27 02:32:15 +0000 2013 1383376687 1383376687 1268 Thu Jun 18 18:25:24 +0000 2009 48439918 48439918 1269 Mon Jan 11 22:07:50 +0000 2016 4778173235 4778173235 1270 Fri Aug 17 22:20:09 +0000 2012 764552694 764552694 1271 Tue Jan 15 01:54:44 +0000 2013 1090750657 1090750657 1272 Mon Aug 06 04:08:39 +0000 2012 739815102 739815102 1273 Wed Jan 08 10:31:19 +0000 2014 2281953847 2281953847 1274 Sun Sep 20 09:19:35 +0000 2015 3625402333 3625402333 1275 Fri Sep 19 21:50:28 +0000 2014 2820491088 2820491088 1276 Sun Jan 30 10:10:04 +0000 2011 244876374 244876374 1277 Mon Aug 27 20:56:44 +0000 2012 785460690 785460690 1278 Sat Sep 27 14:11:48 +0000 2008 16485764 16485764 1279 Mon May 14 12:59:01 +0000 2012 579858097 579858097 1280 Tue Jan 22 20:02:08 +0000 2013 1112486646 1112486646 1281 Sat Jul 18 20:15:17 +0000 2009 58021061 58021061 1282 Wed Jul 25 06:03:42 +0000 2012 715571516 715571516 1283 Fri Mar 16 12:30:29 +0000 2012 526346146 526346146 1284 Mon Mar 12 03:44:53 +0000 2007 980611 980611 1285 Tue Nov 06 15:37:42 +0000 2012 929963226 929963226 username screen_name location \ 0 Ali Ahmadi AliR_Ahmadi Tehran-NYC 1 real me 73101mtp None 2 Linda Osheroff Oshcoy None 3 Drudge Report Feed drudgereported None 4 march23ho☺️✨ _BEaUbree @ the doe waiting on March☺️✨ 5 irrelevant _shutupjai None 6 Connie cabootee North East 7 Preston Grisham prestongrisham Washington, DC 8 Susan Thomasа ebejekifabox None 9 Biljana B Milenkovic bibacus Washington DC 10 Victor Acuna VictorAcuna8 None 11 tulio gómez tulio1987 GUÁRICO VENEZUELA. 12 The Puritan Baptist PuritanBaptist Trafalgar, Indiana 13 Robert holgate 415holgate None 14 maureen grandmapurse Seattle 15 CoolHand7a coolhand7a Land of the Free! 16 ctl jetx86 None 17 Rickster Rickster Rickstersays None 18 ReFounderParty ReFounderParty USA 19 Carole Curtis 2013_carole None 20 . itstrainell Gonzales, LA 21 venr Venr95 issaquah , Washington 22 Donna rustythimble54 None 23 ㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤ 1umanta None 24 CrAiG LaRkIn KB2FED kb2fed Rome NY FN23gf 25 Jeff V Jeffsright Iowa-Fly over country 26 Jon Stall jon4stall None 27 Jane Jane_WI Fitzwalkerstan 28 Jim Ertel ertel_jim Canandaigua, NY 29 Andres canadillandres None ... ... ... ... 1256 juana uribe juanauribep Bogota 1257 angie phelps skyoversc None 1258 Kansas Liberal KsLiberal None 1259 Il Boccalone avolteabboccano None 1260 Ecuador en Directo EcuadorDirecto Guayaquil, Ecuador 1261 Doug Dennison DougDennison1 None 1262 AdolfJoeBiden™ Bidenshairplugs USA 1263 Annie's Mom AppleAnnie_2 Texas, USA 1264 Hanna Bagheri HannaBags Univ. of North Texas 1265 Neil Stevens presjpolk Northern Virginia 1266 Clark Williams Knight276 United States 1267 Xristos Xristos585 None 1268 Capitol24 randomlyCapitol London, England 1269 إيهم الغامدي، ejxnukgfxavk None 1270 Jared Johnson jaredmj106 Jacksonville, NC 1271 Gary McGraw cigitalgem 'merica 1272 Leigh brorichysistrly None 1273 CDHVDS CDHVDS Nederland 1274 Serge Poznanski poznanski_serge None 1275 Terris Anne Ladd tladd58 Mayfield, Ky 1276 Blog de Juan Pardo JuanPardoZurg Almería (España) 1277 Delendarius Delendarius None 1278 Wavpin WavPin The World 1279 Justine unSafe FL_narcissist Florida 1280 Lisa Kirby LisaKBromley None 1281 Janet Scott JanetSScott Kansas City, MO 1282 BOYWONDERELROYJETSON thesaddleguy1 Pittsburgh, PA 1283 Terri and Tony TCuccio None 1284 Karoli Karoli West Coast 1285 dawn goodfallow dawngpsalm63 None followers_count friends_count user_lang following geo coordinates \ 0 878 739 en None None None 1 81 55 en None None None 2 199 338 en None None None 3 26 43 en None None None 4 588 296 en None None None 5 894 676 en None None None 6 100 86 en None None None 7 1668 1595 en None None None 8 0 15 en None None None 9 414 612 en None None None 10 224 156 en None None None 11 1237 2998 es None None None 12 59 40 en None None None 13 1247 2097 en None None None 14 277 158 en None None None 15 172 29 en None None None 16 4438 4343 en None None None 17 865 786 en None None None 18 2852 3120 en None None None 19 133 95 en None None None 20 512 338 en None None None 21 139 30 ar None None None 22 4399 4312 en None None None 23 7273 5472 en None None None 24 1566 3088 en None None None 25 3874 4133 en None None None 26 35 168 en None None None 27 3960 3804 en None None None 28 6 34 en None None None 29 32 151 es None None None ... ... ... ... ... ... ... 1256 29426 777 es None None None 1257 325 1177 en None None None 1258 205 90 en None None None 1259 11 4 it None None None 1260 460 189 es None None None 1261 71 87 en None None None 1262 31212 13293 en None None None 1263 189 131 en None None None 1264 519 415 en None None None 1265 2292 446 en None None None 1266 1191 601 en None None None 1267 64 394 en None None None 1268 7473 7188 en None None None 1269 1 129 en None None None 1270 5289 5708 en None None None 1271 2236 19 en None None None 1272 109 138 en None None None 1273 251 710 nl None None None 1274 434 3558 fr None None None 1275 1980 2287 en None None None 1276 3308 3849 es None None None 1277 183 142 en None None None 1278 1815 1978 en None None None 1279 1129 1860 en None None None 1280 1648 2172 en None None None 1281 64 266 en None None None 1282 950 5001 en None None None 1283 206 183 en None None None 1284 16948 3699 en None None None 1285 1076 1873 en None None None retweet_count favorite_count lang country 0 0 0 en None 1 0 0 en None 2 0 0 en None 3 0 0 en None 4 0 0 en None 5 0 0 und None 6 0 0 en None 7 0 0 en None 8 0 0 en None 9 0 0 en None 10 0 0 en None 11 0 0 es None 12 0 0 und None 13 0 0 en None 14 0 0 en None 15 0 0 en None 16 0 0 en None 17 0 0 en None 18 0 0 en None 19 0 0 en None 20 0 0 und None 21 0 0 en None 22 0 0 en None 23 0 0 en None 24 0 0 en None 25 0 0 en None 26 0 0 en None 27 0 0 en None 28 0 0 en None 29 0 0 es None ... ... ... ... ... 1256 0 0 es None 1257 0 0 en None 1258 0 0 en None 1259 0 0 it None 1260 0 0 es None 1261 0 0 en None 1262 0 0 en None 1263 0 0 en None 1264 0 0 en None 1265 0 0 en None 1266 0 0 en None 1267 0 0 en None 1268 0 0 en None 1269 0 0 en None 1270 0 0 en None 1271 0 0 en None 1272 0 0 en None 1273 0 0 en None 1274 0 0 en None 1275 0 0 en None 1276 0 0 es España 1277 0 0 en None 1278 0 0 en None 1279 0 0 en None 1280 0 0 en None 1281 0 0 en None 1282 0 0 en None 1283 0 0 en None 1284 0 0 en None 1285 0 0 en None [1286 rows x 17 columns]In [7]:tweets_by_loc = tweets['location'].value_counts() print len(tweets_by_loc)597In [8]:tweets_by_loc = tweets['location'].value_counts() # determines most frequent tweet Locations print tweets_by_loc # prints out Locations and how many tweets came from each Location - in descending orderUSA 22 United States 10 Florida 7 New Jersey, USA 7 North Carolina 6 Los Angeles 6 Texas, USA 6 The World 6 Houston, TX 5 Atlanta, GA 5 New York, NY 5 Washington DC 5 New York 5 California, USA 5 London 4 Washington, DC 4 Florida, USA 4 Brooklyn, NY 4 Texas 4 World Wide Web 3 ST. LOUIS 3 Va. Beach,Va. 3 Republic of Texas 3 Illinois 3 New London 1775 3 U.S.A. 3 O-H-I-O 3 Pullman, WA 3 ENGLAND 3 Canada 3 .. ny 1 Alexandria, VA 1 Danmark og verden 1 The Mid-West 1 Arizona, USA 1 Clarksville,Tn 1 Alabama Georgia Line 1 Ithilien 1 The Hills of Los Angeles 1 Jalapa 1 Hamburg 1 Germany 1 texas 1 Mansfield, TX 1 #FeelTheBern SF Bay Area 1 I am not affiliated w/pk in CO 1 São Paulo, Brasil 1 Miami 1 Comarca Bolsón 1 Defiance,PA 1 USA 1 Fermo 1 #BDS is a moral imperative 1 new york 1 New Zealand 1 Hampton Roads, Va 1 Trafalgar, Indiana 1 London, England 1 Marietta, Georgia 1 issaquah , Washington 1 Name: location, dtype: int64In [9]:tweets_by_lang = tweets['user_lang'].value_counts() # determines most frequent tweet countries print tweets_by_lang # prints Tweet Freq. in each used Languageen 1107 es 107 fr 15 en-gb 10 it 7 de 6 ja 5 ru 4 ar 3 pt 3 nl 3 tr 2 sv 2 da 2 th 2 en-GB 2 uk 1 es-MX 1 pl 1 fi 1 he 1 el 1 Name: user_lang, dtype: int64In [11]:tweets_by_lang = tweets['user_lang'].value_counts() fig, ax = plt.subplots() ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=10) ax.set_xlabel('User_lang', fontsize=15) ax.set_ylabel('Number of tweets' , fontsize=15) ax.set_title('Top 5 user_lang', fontsize=25, fontweight='bold',color='black') tweets_by_lang[:5].plot(ax=ax, kind='bar', color='black') # # Top Five user Languages are - English - Espanol , French , En-Gb ? and Presumably Italian . #Out[11]:<matplotlib.axes._subplots.AxesSubplot at 0xfb52438>In [12]:tweets_by_loc = tweets['location'].value_counts() fig, ax = plt.subplots() ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=10) ax.set_xlabel('Locations', fontsize=15) ax.set_ylabel('Number of tweets' , fontsize=15) ax.set_title('Top 25 Locations', fontsize=25, fontweight='bold',color='black') tweets_by_loc[:25].plot(ax=ax, kind='bar', color='black')Out[12]:<matplotlib.axes._subplots.AxesSubplot at 0xf8ce208>In [13]:tweets_by_followers_count = tweets['followers_count'].value_counts() # Need to re- check seems something wrong here ... fig, ax = plt.subplots() ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=10) ax.set_xlabel('followers_count', fontsize=15) ax.set_ylabel(' Count' , fontsize=15) ax.set_title('Top 15 followers_count', fontsize=25, fontweight='bold',color='black') tweets_by_followers_count[:15].plot(ax=ax, kind='bar', color='black')Out[13]:<matplotlib.axes._subplots.AxesSubplot at 0x1040be80>In [14]:import reIn [16]:def word_in_text(word, text): word = word.lower() text = text.lower() match = re.search(word, text) if match: return True return False def extract_link(text): regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+' match = re.search(regex, text) if match: return match.group() return '' if __name__ == '__main__': tweets_data_path = 'C:/..../Twitter1/_US_Pres.txt' tweets_data = [] tweets_file = open(tweets_data_path, "r") for line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continue tweets = pd.DataFrame() texts = [] langs = [] countries = [] for line, tweet in enumerate(tweets_data): #print line, tweet try: texts.append(tweet['text']) # langs.append(tweet['lang']) # countries.append(tweet['place']['country'] if tweet['place'] != None else None) except: print "Error line %d" % (line) tweets['text'] = texts # tweets['lang'] = langs # tweets['country'] = countries #Mining tweets['python'] = tweets['text'].apply(lambda tweet: word_in_text('python', tweet)) tweets['javascript'] = tweets['text'].apply(lambda tweet: word_in_text('javascript', tweet)) tweets['ruby'] = tweets['text'].apply(lambda tweet: word_in_text('ruby', tweet)) tweets['programming'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet)) tweets['tutorial'] = tweets['text'].apply(lambda tweet: word_in_text('tutorial', tweet)) tweets['relevant'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet) or word_in_text('tutorial', tweet)) tweets['link'] = tweets['text'].apply(lambda tweet: extract_link(tweet)) tweets_relevant = tweets[tweets['relevant'] == True] tweets_relevant_with_link = tweets_relevant[tweets_relevant['link'] != ''] print tweets_relevant_with_link[tweets_relevant_with_link['python'] == True]['link'] print tweets_relevant_with_link[tweets_relevant_with_link['javascript'] == True]['link'] print tweets_relevant_with_link[tweets_relevant_with_link['ruby'] == True]['link'] plt.show()Series([], Name: link, dtype: object) Series([], Name: link, dtype: object) Series([], Name: link, dtype: object)In [17]:import tweepy auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) ids = [] current_cursor = "" for page in tweepy.Cursor(api.friends_ids, screen_name="DhankarRohit").pages(): cursor = tweepy.Cursor(api.followers_ids, screen_name="DhankarRohit", cursor = current_cursor) current_cursor = cursor.iterator.next_cursor # print repr(cursor) # int current_cursor ids.extend(page) # print page # print ids print len(ids)1686In [ ]:
Subscribe to:
Posts (Atom)