Monday, 11 January 2016

Twitter API - LIVE Stream Mining PART - II

#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

#Variables that contains the user credentials to access Twitter API 
access_token = '7###############d'
access_token_secret = 'A#############
consumer_key = 'Q###########1'
consumer_secret = 'S##################x'


#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        print data
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':

    #This handles Twitter authentication and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #Key word - OBAMA ....
    stream.filter(track=['OBAMA '])
    for tweet in stream:
        print tweet['text']
    #DATA Mining Tweets which mention - OBAMA .....only 1 tweets JSON dump shown rest all truncated .............


{"created_at":"Mon Jan 11 21:36:39 +0000 2016","id":686663033926594562,"id_str":"686663033926594562","text":"@McCormackJohn @JonathanLanday Will be awkward when they get up to clap when Obama talks about helping poor, climate change, Iran deal, etc.","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":686662207858438145,"in_reply_to_status_id_str":"686662207858438145","in_reply_to_user_id":27689907,"in_reply_to_user_id_str":"27689907","in_reply_to_screen_name":"McCormackJohn","user":{"id":2208906530,"id_str":"2208906530","name":"Ali Ahmadi","screen_name":"AliR_Ahmadi","location":"Tehran-NYC","url":null,"description":"Researcher and writer focusing on Iran, the Middle East and sources of US foreign policy towards the region. Personal account.","protected":false,"verified":false,"followers_count":878,"friends_count":739,"listed_count":38,"favourites_count":1374,"statuses_count":9729,"created_at":"Fri Nov 22 12:41:04 +0000 2013","utc_offset":-28800,"time_zone":"Pacific Time (US & Canada)","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/573155028506181632\/gV1_lfDs_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/573155028506181632\/gV1_lfDs_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/2208906530\/1428808169","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"McCormackJohn","name":"John McCormack","id":27689907,"id_str":"27689907","indices":[0,14]},{"screen_name":"JonathanLanday","name":"Jonathan Landay","id":47408060,"id_str":"47408060","indices":[15,30]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1452548199527"}




In [3]:
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

import json
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')

#Variables that contains the user credentials to access Twitter API 
access_token = '7###########'
access_token_secret = 'A#############
consumer_key = 'Q###########
consumer_secret = 'SO##################
In [4]:
tweets_data_path = 'C:/...../Twitter1/_US_Pres.txt'

tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
    try:
        tweet = json.loads(line)
        tweets_data.append(tweet)
    except:
        continue
In [5]:
print len(tweets_data)
1286
In [6]:
tweets = pd.DataFrame()
tweets['tweet'] = map(lambda tweet: tweet['text'] if 'text' in tweet else None, tweets_data)
tweets['created_at'] = map(lambda tweet: tweet['created_at'] if 'created_at' in tweet else None, tweets_data)
tweets['user_id'] = map(lambda tweet: tweet['user']['id'] if 'user' in tweet else None, tweets_data)
tweets['id_str'] = map(lambda tweet: tweet['user']['id_str'] if 'user' in tweet else None, tweets_data)
tweets['username'] = map(lambda tweet: tweet['user']['name'] if 'user' in tweet else None, tweets_data)
tweets['screen_name'] = map(lambda tweet: tweet['user']['screen_name'] if 'user' in tweet else None, tweets_data)
tweets['location'] = map(lambda tweet: tweet['user']['location'] if 'user' in tweet else None, tweets_data)
tweets['followers_count'] = map(lambda tweet: tweet['user']['followers_count'] if 'user' in tweet else None, tweets_data)
tweets['friends_count'] = map(lambda tweet: tweet['user']['friends_count'] if 'user' in tweet else None, tweets_data)
tweets['created_at'] = map(lambda tweet: tweet['user']['created_at'] if 'user' in tweet else None, tweets_data)
tweets['user_lang'] = map(lambda tweet: tweet['user']['lang'] if 'user' in tweet else None, tweets_data)
tweets['following'] = map(lambda tweet: tweet['user']['following'] if 'user' in tweet else None, tweets_data)
tweets['geo'] = map(lambda tweet: tweet['geo'], tweets_data)
#
tweets['coordinates'] = map(lambda tweet: tweet['coordinates'], tweets_data)
tweets['retweet_count'] = map(lambda tweet: tweet['retweet_count'], tweets_data)
tweets['favorite_count'] = map(lambda tweet: tweet['favorite_count'], tweets_data)
tweets['lang'] = map(lambda tweet: tweet['lang'], tweets_data)
# if k1 in d and k2 in d[k1]
tweets['country'] = map(lambda tweet: tweet['place']['country'] if 'country' in tweet else None, tweets_data)
tweets['country'] = map(lambda tweet: tweet['place']['country'] if tweet['place'] != None else None, tweets_data)

print (tweets)
                                                  tweet  \
0     @McCormackJohn @JonathanLanday Will be awkward...   
1     RT @trscoop: Ted Cruz: Unlike Obama, I don’t i...   
2     @2ANow Well now, you've hit on the question Ob...   
3     STATE OF THE LEGACY: Obama 'will be talking ab...   
4     RT @KomaKazii: idc what nobody say MCK is trul...   
5        RT @swornfavoritee: RT https://t.co/V84oCANXej   
6     RT @PatriotGinger: MT @LibertyUSA1776: Let Oba...   
7     RT @TalKopan: Paul Ryan says Nikki Haley has a...   
8     RT @Drudge_Report_: #STATE OF #UNION: #Obama t...   
9     RT @nytimes: President Obama’s guests at his S...   
10    RT @SenateDems: At State of the Union, Michell...   
11    Barack Obama prepara su último discurso sobre ...   
12    RT @IndyDayspring: @PuritanBaptist  https://t....   
13    Obama’s SOTU guest list includes  gay rights a...   
14    RT @jacobperry: "I hate you guys and want to d...   
15    RT @gcamp4: Get Obama impeached contact your C...   
16    RT @KonniBurton: #birthnuts indeed! MAN, we ne...   
17    Fox's Wallace Falsely Claims Obama's Executive...   
18    RT @PoliceFirstNow: BREAKING: Top US Admiral F...   
19    RT @JudgeMoroz: Philly's mayor & Obama are...   
20                           rt https://t.co/eOpPRMYIJ6   
21    RT @nytopinion: So what should we say about th...   
22    Obama's closing it with Exec. Order end run ar...   
23          He can suck a dick  https://t.co/crPnkXeb0Y   
24    RT @peddoc63: Obama only wants Guns to keep HI...   
25    RT @NaughtyBeyotch: Obama Hiding Why Muslim Re...   
26    New: Obama to make good on Guantanamo pledge: ...   
27    RT @Lawsonbulk: GOP Rep: Gun Reform An Obama P...   
28    Obama goes it alone in his last State of the U...   
29    RT @mmorera79: Madre mía los controles Pa entr...   
...                                                 ...   
1256  RT @alvaroforero: Oposición decía: políticas d...   
1257  RT @nytimes: Bernie Sanders on the "recklessne...   
1258  Facts don’t lie but these meme sure does. Comp...   
1259  #Lercio USA, Obama annuncia stretta sulla vend...   
1260  Piden fin de redadas migratorias a Obama --&gt...   
1261  RT @trscoop: Ted Cruz: Unlike Obama, I don’t i...   
1262  RT @Ricky_Vaughn99: I will never understand wh...   
1263  RT @rockenschtroodl: Hillary simply has no ans...   
1264  RT @nowthisnews: Obama's executive action on g...   
1265  Remember that “Mmm mmm mmm Barack Hussein Obam...   
1266  @oefoif0506  could be lots more Obama is plann...   
1267  Obama False Gun Control Exposed by Tireo serio...   
1268  RT @adamjohnsonNYC: Watch the last 4 president...   
1269  RT @Adel__Almalki: #news by #almalki: Obama to...   
1270  Obama takes this year's State of Union to YouT...   
1271  RT @pourmecoffee: @pourmecoffee Obama's Hip-Ho...   
1272  RT @WSJ: Hillary Clinton proposes 4% income-ta...   
1273  RT @Ryanamber711: Horror.  Citizen Journalism ...   
1274  YahooNews: Democratic presidential candidate H...   
1275  RT @TehachapiHomes: Guantánamo Detainee Freed ...   
1276  Hillary Clinton tiene el apoyo de la excongres...   
1277  RT @ZaidJilani: Bernies support from latinos i...   
1278  8 Unforgettable Moments From President Obama’s...   
1279  RT @franktorresnet: RPOF says probe expansion ...   
1280  RT @JohnJHarwood: BREAKING: in Iowa, Hillary C...   
1281  RT @ColMorrisDavis: If U.S. courts and prisons...   
1282  RT @MilitaryTimes: Obama taps Army vet for DoD...   
1283  RT @Cap_Institute: BREAKING: Leaked Emails Rev...   
1284  I love how GOP obstructionists get a complete ...   
1285  #America @DineshDSouza warned us abt Obama yrs...   

                          created_at     user_id      id_str  \
0     Fri Nov 22 12:41:04 +0000 2013  2208906530  2208906530   
1     Tue Oct 05 20:17:50 +0000 2010   199010943   199010943   
2     Sat Nov 23 13:20:52 +0000 2013  2210672052  2210672052   
3     Thu Oct 30 16:44:56 +0000 2014  2851626833  2851626833   
4     Tue Sep 02 21:26:37 +0000 2014  2786720672  2786720672   
5     Wed Oct 09 01:11:51 +0000 2013  1948239516  1948239516   
6     Wed Oct 29 22:05:34 +0000 2008    17053388    17053388   
7     Sat Oct 04 20:12:31 +0000 2008    16594804    16594804   
8     Mon Jan 11 19:02:05 +0000 2016  4776816573  4776816573   
9     Fri Nov 05 12:45:01 +0000 2010   212207495   212207495   
10    Mon Nov 23 06:00:44 +0000 2015  4330934120  4330934120   
11    Sat May 08 15:38:56 +0000 2010   141627342   141627342   
12    Sun Feb 03 01:57:11 +0000 2013  1143919482  1143919482   
13    Sat Apr 04 16:19:15 +0000 2015  3139235347  3139235347   
14    Thu Apr 16 17:13:12 +0000 2009    31993522    31993522   
15    Wed Jun 17 13:48:20 +0000 2015  3331284363  3331284363   
16    Mon Apr 23 01:47:15 +0000 2012   560828139   560828139   
17    Wed Oct 06 18:56:00 +0000 2010   199388764   199388764   
18    Sat Sep 12 19:39:13 +0000 2009    73717434    73717434   
19    Tue Dec 23 10:42:05 +0000 2014  2938154393  2938154393   
20    Thu Dec 25 03:11:12 +0000 2014  2940242871  2940242871   
21    Sat Nov 21 17:22:04 +0000 2015  4243588577  4243588577   
22    Tue Mar 23 23:35:37 +0000 2010   125813890   125813890   
23    Sat Jun 29 03:09:39 +0000 2013  1554587022  1554587022   
24    Wed Apr 28 08:33:12 +0000 2010   137976256   137976256   
25    Mon Mar 19 19:37:57 +0000 2012   529662586   529662586   
26    Sun Jan 18 02:35:30 +0000 2015  2983431473  2983431473   
27    Tue Mar 15 16:59:07 +0000 2011   266690646   266690646   
28    Sun Jul 26 14:50:31 +0000 2015  3295404680  3295404680   
29    Sun Mar 09 21:23:59 +0000 2014  2397833950  2397833950   
...                              ...         ...         ...   
1256  Mon Apr 05 03:09:54 +0000 2010   129683990   129683990   
1257  Wed Apr 15 15:55:37 +0000 2009    31439632    31439632   
1258  Mon Jun 02 16:11:52 +0000 2014  2541779029  2541779029   
1259  Mon Oct 06 06:30:52 +0000 2014  2809655590  2809655590   
1260  Sun Jun 30 04:23:09 +0000 2013  1557207091  1557207091   
1261  Mon Jun 11 13:51:25 +0000 2012   605517957   605517957   
1262  Thu Sep 15 05:25:59 +0000 2011   373783051   373783051   
1263  Wed May 06 01:37:30 +0000 2015  3237600915  3237600915   
1264  Thu Jul 21 02:56:41 +0000 2011   339430631   339430631   
1265  Sat Aug 22 02:28:23 +0000 2009    67790595    67790595   
1266  Wed May 13 03:57:17 +0000 2015  3193836186  3193836186   
1267  Sat Apr 27 02:32:15 +0000 2013  1383376687  1383376687   
1268  Thu Jun 18 18:25:24 +0000 2009    48439918    48439918   
1269  Mon Jan 11 22:07:50 +0000 2016  4778173235  4778173235   
1270  Fri Aug 17 22:20:09 +0000 2012   764552694   764552694   
1271  Tue Jan 15 01:54:44 +0000 2013  1090750657  1090750657   
1272  Mon Aug 06 04:08:39 +0000 2012   739815102   739815102   
1273  Wed Jan 08 10:31:19 +0000 2014  2281953847  2281953847   
1274  Sun Sep 20 09:19:35 +0000 2015  3625402333  3625402333   
1275  Fri Sep 19 21:50:28 +0000 2014  2820491088  2820491088   
1276  Sun Jan 30 10:10:04 +0000 2011   244876374   244876374   
1277  Mon Aug 27 20:56:44 +0000 2012   785460690   785460690   
1278  Sat Sep 27 14:11:48 +0000 2008    16485764    16485764   
1279  Mon May 14 12:59:01 +0000 2012   579858097   579858097   
1280  Tue Jan 22 20:02:08 +0000 2013  1112486646  1112486646   
1281  Sat Jul 18 20:15:17 +0000 2009    58021061    58021061   
1282  Wed Jul 25 06:03:42 +0000 2012   715571516   715571516   
1283  Fri Mar 16 12:30:29 +0000 2012   526346146   526346146   
1284  Mon Mar 12 03:44:53 +0000 2007      980611      980611   
1285  Tue Nov 06 15:37:42 +0000 2012   929963226   929963226   

                  username      screen_name                       location  \
0               Ali Ahmadi      AliR_Ahmadi                     Tehran-NYC   
1                  real me         73101mtp                           None   
2           Linda Osheroff           Oshcoy                           None   
3       Drudge Report Feed   drudgereported                           None   
4             march23ho☺️✨        _BEaUbree  @ the doe waiting on March☺️✨   
5               irrelevant       _shutupjai                           None   
6                   Connie         cabootee                     North East   
7          Preston Grisham   prestongrisham                 Washington, DC   
8            Susan Thomasа     ebejekifabox                           None   
9     Biljana B Milenkovic          bibacus                  Washington DC   
10            Victor Acuna     VictorAcuna8                           None   
11             tulio gómez        tulio1987             GUÁRICO VENEZUELA.   
12     The Puritan Baptist   PuritanBaptist             Trafalgar, Indiana   
13          Robert holgate       415holgate                           None   
14                 maureen     grandmapurse                        Seattle   
15              CoolHand7a       coolhand7a              Land of the Free!   
16                     ctl           jetx86                           None   
17       Rickster Rickster     Rickstersays                           None   
18          ReFounderParty   ReFounderParty                            USA   
19           Carole Curtis      2013_carole                           None   
20                       .      itstrainell                   Gonzales, LA   
21                    venr           Venr95          issaquah , Washington   
22                   Donna   rustythimble54                           None   
23            ㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤㅤ          1umanta                           None   
24     CrAiG LaRkIn KB2FED           kb2fed                 Rome NY FN23gf   
25                  Jeff V       Jeffsright          Iowa-Fly over country   
26               Jon Stall        jon4stall                           None   
27                    Jane          Jane_WI                 Fitzwalkerstan   
28               Jim Ertel        ertel_jim                Canandaigua, NY   
29                  Andres   canadillandres                           None   
...                    ...              ...                            ...   
1256           juana uribe      juanauribep                         Bogota   
1257         angie  phelps        skyoversc                           None   
1258        Kansas Liberal        KsLiberal                           None   
1259          Il Boccalone  avolteabboccano                           None   
1260    Ecuador en Directo   EcuadorDirecto             Guayaquil, Ecuador   
1261         Doug Dennison    DougDennison1                           None   
1262        AdolfJoeBiden™  Bidenshairplugs                            USA   
1263           Annie's Mom     AppleAnnie_2                     Texas, USA   
1264         Hanna Bagheri        HannaBags           Univ. of North Texas   
1265          Neil Stevens        presjpolk              Northern Virginia   
1266        Clark Williams        Knight276                  United States   
1267               Xristos       Xristos585                           None   
1268             Capitol24  randomlyCapitol                London, England   
1269         إيهم الغامدي،     ejxnukgfxavk                           None   
1270         Jared Johnson       jaredmj106               Jacksonville, NC   
1271           Gary McGraw       cigitalgem                        'merica   
1272                 Leigh  brorichysistrly                           None   
1273                CDHVDS           CDHVDS                      Nederland   
1274       Serge Poznanski  poznanski_serge                           None   
1275      Terris Anne Ladd          tladd58                   Mayfield, Ky   
1276    Blog de Juan Pardo    JuanPardoZurg               Almería (España)   
1277           Delendarius      Delendarius                           None   
1278                Wavpin           WavPin                      The World   
1279        Justine unSafe    FL_narcissist                        Florida   
1280            Lisa Kirby     LisaKBromley                           None   
1281           Janet Scott      JanetSScott                Kansas City, MO   
1282  BOYWONDERELROYJETSON    thesaddleguy1                 Pittsburgh, PA   
1283        Terri and Tony          TCuccio                           None   
1284                Karoli           Karoli                     West Coast   
1285       dawn goodfallow     dawngpsalm63                           None   

      followers_count  friends_count user_lang following   geo coordinates  \
0                 878            739        en      None  None        None   
1                  81             55        en      None  None        None   
2                 199            338        en      None  None        None   
3                  26             43        en      None  None        None   
4                 588            296        en      None  None        None   
5                 894            676        en      None  None        None   
6                 100             86        en      None  None        None   
7                1668           1595        en      None  None        None   
8                   0             15        en      None  None        None   
9                 414            612        en      None  None        None   
10                224            156        en      None  None        None   
11               1237           2998        es      None  None        None   
12                 59             40        en      None  None        None   
13               1247           2097        en      None  None        None   
14                277            158        en      None  None        None   
15                172             29        en      None  None        None   
16               4438           4343        en      None  None        None   
17                865            786        en      None  None        None   
18               2852           3120        en      None  None        None   
19                133             95        en      None  None        None   
20                512            338        en      None  None        None   
21                139             30        ar      None  None        None   
22               4399           4312        en      None  None        None   
23               7273           5472        en      None  None        None   
24               1566           3088        en      None  None        None   
25               3874           4133        en      None  None        None   
26                 35            168        en      None  None        None   
27               3960           3804        en      None  None        None   
28                  6             34        en      None  None        None   
29                 32            151        es      None  None        None   
...               ...            ...       ...       ...   ...         ...   
1256            29426            777        es      None  None        None   
1257              325           1177        en      None  None        None   
1258              205             90        en      None  None        None   
1259               11              4        it      None  None        None   
1260              460            189        es      None  None        None   
1261               71             87        en      None  None        None   
1262            31212          13293        en      None  None        None   
1263              189            131        en      None  None        None   
1264              519            415        en      None  None        None   
1265             2292            446        en      None  None        None   
1266             1191            601        en      None  None        None   
1267               64            394        en      None  None        None   
1268             7473           7188        en      None  None        None   
1269                1            129        en      None  None        None   
1270             5289           5708        en      None  None        None   
1271             2236             19        en      None  None        None   
1272              109            138        en      None  None        None   
1273              251            710        nl      None  None        None   
1274              434           3558        fr      None  None        None   
1275             1980           2287        en      None  None        None   
1276             3308           3849        es      None  None        None   
1277              183            142        en      None  None        None   
1278             1815           1978        en      None  None        None   
1279             1129           1860        en      None  None        None   
1280             1648           2172        en      None  None        None   
1281               64            266        en      None  None        None   
1282              950           5001        en      None  None        None   
1283              206            183        en      None  None        None   
1284            16948           3699        en      None  None        None   
1285             1076           1873        en      None  None        None   

      retweet_count  favorite_count lang country  
0                 0               0   en    None  
1                 0               0   en    None  
2                 0               0   en    None  
3                 0               0   en    None  
4                 0               0   en    None  
5                 0               0  und    None  
6                 0               0   en    None  
7                 0               0   en    None  
8                 0               0   en    None  
9                 0               0   en    None  
10                0               0   en    None  
11                0               0   es    None  
12                0               0  und    None  
13                0               0   en    None  
14                0               0   en    None  
15                0               0   en    None  
16                0               0   en    None  
17                0               0   en    None  
18                0               0   en    None  
19                0               0   en    None  
20                0               0  und    None  
21                0               0   en    None  
22                0               0   en    None  
23                0               0   en    None  
24                0               0   en    None  
25                0               0   en    None  
26                0               0   en    None  
27                0               0   en    None  
28                0               0   en    None  
29                0               0   es    None  
...             ...             ...  ...     ...  
1256              0               0   es    None  
1257              0               0   en    None  
1258              0               0   en    None  
1259              0               0   it    None  
1260              0               0   es    None  
1261              0               0   en    None  
1262              0               0   en    None  
1263              0               0   en    None  
1264              0               0   en    None  
1265              0               0   en    None  
1266              0               0   en    None  
1267              0               0   en    None  
1268              0               0   en    None  
1269              0               0   en    None  
1270              0               0   en    None  
1271              0               0   en    None  
1272              0               0   en    None  
1273              0               0   en    None  
1274              0               0   en    None  
1275              0               0   en    None  
1276              0               0   es  España  
1277              0               0   en    None  
1278              0               0   en    None  
1279              0               0   en    None  
1280              0               0   en    None  
1281              0               0   en    None  
1282              0               0   en    None  
1283              0               0   en    None  
1284              0               0   en    None  
1285              0               0   en    None  

[1286 rows x 17 columns]
In [7]:
tweets_by_loc = tweets['location'].value_counts()
print len(tweets_by_loc)
597
In [8]:
tweets_by_loc = tweets['location'].value_counts()                # determines most frequent tweet Locations
print tweets_by_loc          # prints out Locations and how many tweets came from each Location - in descending order
USA                               22
United States                     10
Florida                            7
New Jersey, USA                    7
North Carolina                     6
Los Angeles                        6
Texas, USA                         6
The World                          6
Houston, TX                        5
Atlanta, GA                        5
New York, NY                       5
Washington DC                      5
New York                           5
California, USA                    5
London                             4
Washington, DC                     4
Florida, USA                       4
Brooklyn, NY                       4
Texas                              4
World Wide Web                     3
  ST. LOUIS                        3
Va. Beach,Va.                      3
Republic of Texas                  3
Illinois                           3
New London 1775                    3
U.S.A.                             3
O-H-I-O                            3
Pullman, WA                        3
ENGLAND                            3
Canada                             3
                                  ..
ny                                 1
Alexandria, VA                     1
Danmark og verden                  1
The Mid-West                       1
Arizona, USA                       1
Clarksville,Tn                     1
Alabama Georgia Line               1
Ithilien                           1
The Hills of Los Angeles           1
Jalapa                             1
Hamburg                            1
Germany                            1
texas                              1
Mansfield, TX                      1
#FeelTheBern SF Bay Area           1
I am not affiliated w/pk in CO     1
São Paulo, Brasil                  1
Miami                              1
Comarca Bolsón                     1
Defiance,PA                        1
USA                                1
Fermo                              1
#BDS is a moral imperative         1
new york                           1
New Zealand                        1
Hampton Roads, Va                  1
Trafalgar, Indiana                 1
London, England                    1
Marietta, Georgia                  1
issaquah , Washington              1
Name: location, dtype: int64
In [9]:
tweets_by_lang = tweets['user_lang'].value_counts()                # determines most frequent tweet countries
print tweets_by_lang                                               # prints Tweet Freq. in each used Language 
en       1107
es        107
fr         15
en-gb      10
it          7
de          6
ja          5
ru          4
ar          3
pt          3
nl          3
tr          2
sv          2
da          2
th          2
en-GB       2
uk          1
es-MX       1
pl          1
fi          1
he          1
el          1
Name: user_lang, dtype: int64
In [11]:
tweets_by_lang = tweets['user_lang'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('User_lang', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 user_lang', fontsize=25, fontweight='bold',color='black')
tweets_by_lang[:5].plot(ax=ax, kind='bar', color='black')
#
# Top Five user Languages are - English - Espanol , French , En-Gb ? and Presumably Italian . 
# 
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0xfb52438>
In [12]:
tweets_by_loc = tweets['location'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Locations', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 25 Locations', fontsize=25, fontweight='bold',color='black')
tweets_by_loc[:25].plot(ax=ax, kind='bar', color='black')
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0xf8ce208>
In [13]:
tweets_by_followers_count = tweets['followers_count'].value_counts() 

# Need to re- check seems something wrong here ...

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('followers_count', fontsize=15)
ax.set_ylabel(' Count' , fontsize=15)
ax.set_title('Top 15 followers_count', fontsize=25, fontweight='bold',color='black')
tweets_by_followers_count[:15].plot(ax=ax, kind='bar', color='black')
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x1040be80>
In [14]:
import re
In [16]:
def word_in_text(word, text):
    word = word.lower()
    text = text.lower()
    match = re.search(word, text)
    if match:
        return True
    return False

def extract_link(text):
    regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
    match = re.search(regex, text)
    if match:
        return match.group()
    return ''

if __name__ == '__main__':
    
    tweets_data_path = 'C:/..../Twitter1/_US_Pres.txt'

    tweets_data = []
    tweets_file = open(tweets_data_path, "r")
    for line in tweets_file:
        try:
            tweet = json.loads(line)
            tweets_data.append(tweet)
        except:
            continue

    tweets = pd.DataFrame()

    texts = []
    langs = []
    countries = []
    for line, tweet in enumerate(tweets_data):
        #print line, tweet
        try:
            texts.append(tweet['text'])
#            langs.append(tweet['lang'])
#            countries.append(tweet['place']['country'] if tweet['place'] != None else None)
        except:
            print "Error line %d" % (line)
            

    tweets['text'] = texts
#    tweets['lang'] = langs
#    tweets['country'] = countries

    #Mining
    tweets['python'] = tweets['text'].apply(lambda tweet: word_in_text('python', tweet))
    tweets['javascript'] = tweets['text'].apply(lambda tweet: word_in_text('javascript', tweet))
    tweets['ruby'] = tweets['text'].apply(lambda tweet: word_in_text('ruby', tweet))

    tweets['programming'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet))
    tweets['tutorial'] = tweets['text'].apply(lambda tweet: word_in_text('tutorial', tweet))

    tweets['relevant'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet) or word_in_text('tutorial', tweet))

    tweets['link'] = tweets['text'].apply(lambda tweet: extract_link(tweet))

    tweets_relevant = tweets[tweets['relevant'] == True]
    tweets_relevant_with_link = tweets_relevant[tweets_relevant['link'] != '']

    print tweets_relevant_with_link[tweets_relevant_with_link['python'] == True]['link']
    print tweets_relevant_with_link[tweets_relevant_with_link['javascript'] == True]['link']
    print tweets_relevant_with_link[tweets_relevant_with_link['ruby'] == True]['link']
    plt.show()
Series([], Name: link, dtype: object)
Series([], Name: link, dtype: object)
Series([], Name: link, dtype: object)
In [17]:
import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
ids = []
current_cursor = ""
for page in tweepy.Cursor(api.friends_ids, screen_name="DhankarRohit").pages():

 cursor = tweepy.Cursor(api.followers_ids, screen_name="DhankarRohit",  
 cursor =  current_cursor)
current_cursor = cursor.iterator.next_cursor
# print repr(cursor)
# int current_cursor
ids.extend(page)
# print page 
# print ids

print len(ids)
1686
In [ ]:
 

No comments:

Post a Comment