In [1]:
import graphlab
import os
os.getcwd()
Out[1]:
In [2]:
df_1=graphlab.SFrame("train_1.tsv")
In [3]:
convert_func = lambda x:1 if x>3 else 0
In [4]:
df_1[100:110]
Out[4]:
In [5]:
df_1[1000:1010]
# within 1000 Rows of the DF - the Sentence ID ahs moved from - 3 to 36 and Phrase ID 110 to 1001
Out[5]:
In [6]:
df_1['Target'] = df_1['Sentiment'].apply(convert_func)
#
# Creates a Variable named Target - adds a column at end of DF named Target .
# Values of Sentiment are converted as defined in the - "convert_func"
#
df_1[100:110]
Out[6]:
In [8]:
df_1['word_count'] = graphlab.text_analytics.count_words(df_1['Phrase'])
#
In [12]:
df_1['Phrase'][1:2]
# Seen below we get 1 Row of the DataType String [str] , printed from the Variable Phrase in our Data Frame
# As seen Frequency of Ocurrence of all Words is - ONCE -- besides 'the' which occurs Twice...
Out[12]:
In [9]:
df_1['word_count'][1:2]
# As seen below - a Row of the Dictionary , the dtype: dict - is printed within the Squiggly Braces ..
# Seen below the Frequency of Ocurrence of the Words is -- 1L -- for every word besides 'the' :2L
Out[9]:
In [13]:
df_1['Phrase'][2:4]
# Two Rows are printed out - Row 1 has Two Words == 'A series'
# Row 2 has One Word == 'A'
Out[13]:
In [10]:
df_1['word_count'][2:4]
# Seen below - Two Row's of Dictionary printed , One Row each within the Squiggly Braces.
# Square Brackets surround COMPLETE output we have sought from the - Data Type == dtype: dict.
Out[10]:
In [11]:
df_1['word_count'][2:10]
Out[11]:
No comments:
Post a Comment