# LinkedIn Profile - https://www.linkedin.com/in/rohitdhankar
#
#
# All of this code and any other code / refrences on this blog are shared freely under a GPL - https://www.gnu.org/licenses/gpl-3.0.en.html
# Inspiration -- ORielly Media Book - SciPy and NumPy - here in after refered to as "BOOK" . 
# Code has been corrected and further optimized where broken . 
# Comments are from Book and other sources - my own comments follow my name - DHANKAR. 
# Un-Comment the Print statements to Print out the OutPut in Console / iPython Notebooks 


import numpy as np 

# Create an array with 10^7 elements.
arr = np.arange(1e7)

# Converting ndarray to list
larr = arr.tolist()

# Lists cannot by default broadcast,
# so a function is coded to emulate
# what an ndarray can do.


def list_times(alist, scalar):
    for i, val in enumerate(alist):
        alist[i] = val * scalar
    return alist

# Using IPython's

timeit arr *1.1

timeit list_times(larr, 1.1)

# errors ------------------- TBD

  File "<ipython-input-1-b267ddb4822d>", line 21
    timeit arr *1.1
             ^
SyntaxError: invalid syntax

In [3]:

import numpy as np 

# First we create a list and then
# wrap it with the np.array() function.
alist = [1, 2, 3]
arr = np.array(alist)

print arr 
print "__________"
# Creating an array of zeros with five elements
arr1 = np.zeros(5)

print arr1  # OK 
print "__________"


# What if we want to create an array going from 0 to 100?
arr2 = np.arange(100) #DHANKAR ---  ARANGE ----> ARRAY RANGE 

#print arr2 # OK

[1 2 3]
__________
[ 0.  0.  0.  0.  0.]
__________

In [4]:

arr3 = np.arange(70,100) #DHANKAR ---  ARANGE ----> ARRAY RANGE 70 to 100 

#print arr3 # OK 

# If you want 100 steps from 0 to 1...
arr4 = np.linspace(0, 1, 100)

#print arr4 # OK

In [5]:

# Or if you want to generate an array from 1 to 10
# in log10 space in 10 steps...

arr5 = np.logspace(0, 1, 10, base=10.0) # DHANKAR -- BASE value can be 2,3,4,5 etc .

#print arr5 # OK 

# Creating a 5x5 array of zeros (an image)
image = np.zeros((5,5))
image1 = np.ones((5,5))

#print image1 # OK

In [6]:

# Array of FIVE zeros - Which are INTEGERS 
arr6 = np.zeros(5, dtype=int)

print arr6

# Array of FIVE zero - which are FLOATS
arr7 = np.zeros(5, dtype=np.float32)
arr17 = np.arange(1,5, dtype=np.float32) # Gives 5 Floats32 between Range 1 to 5 
arr27 = np.arange(1,6, dtype=np.float64) # Gives 6 Floats64 between Range 1 to 6 # Cant see Diff between 32 and 64 

print arr27


# Creating a 5x5x5 cube of 1's
# The astype() method sets the array with integer elements.
#cube_int = np.zeros((5,5,5)).astype(int) +1 # If we dont do this +1 then all Elements of ARRAY are ZERO Values 
#cube_int = np.zeros((5,5,5)).astype(int) +3 # If we dont do this +3 then all Elements of ARRAY are ZERO Values 

# DHANKAR ----  This below is NOT a Cube but an 2X2 Array --- with 5 Rows and 5 Columns 
#
cube_int = np.zeros((5,5)).astype(int) +1 # If we dont do this +1 then all Elements of ARRAY are ZERO Values 


# Or even simpler with 16-bit floating-point precision...
cube_float = np.ones((5, 5, 5)).astype(np.float16)

print"_____________"

#print cube_int

print"_____________"

#print cube_float

[0 0 0 0 0]
[ 1.  2.  3.  4.  5.]
_____________
_____________

In [6]:

# Creating an array with elements from 0 to 999
arr1d = np.arange(1000)

#print arr1d #--- OK 


# Now reshaping the array to a 10x10x10 3D array
arr3d = arr1d.reshape((10,10,10))

#print arr3d 

##--- OK 

#OK --- Doesnt Print if the 2D array is of - Less than 1000 

# The reshape command can alternatively be called this way
arr3d1 = np.reshape(arr1d, (10, 10, 10))

#print arr3d1


'''
# Inversely, we can flatten arrays
arr4d = np.zeros((10, 10, 10, 10))
arr1d = arr4d.ravel()

print arr1d.shape
(1000,)

'''

# Not understood Above this

Out[6]:

'\n# Inversely, we can flatten arrays\narr4d = np.zeros((10, 10, 10, 10))\narr1d = arr4d.ravel()\n\nprint arr1d.shape\n(1000,)\n\n'

In [7]:

# Creating an array of zeros and defining column types

recarr = np.zeros((2,), dtype=('i4,f4,a10'))
toadd = [(1,2.,'Hello'),(2,3.,"World")]
recarr[:] = toadd

'''
The dtype optional argument is defining the types designated for the first to third
columns, where i4 corresponds to a 32-bit integer, f4 corresponds to a 32-bit float,
and a10 corresponds to a string 10 characters long. Details on how to define more
types can be found in the NumPy documentation.1 This example illustrates what the
recarray looks like, but it is hard to see how we could populate such an array easily.
Thankfully, in Python there is a global function called zip that will create a -------- "list of tuples"
like we see above for the ---- "toadd object". So we show how to use zip to populate the same
recarray.
'''

print recarr

[(1, 2.0, 'Hello') (2, 3.0, 'World')]

In [12]:

# Creating an array of zeros and defining column types
recarr1 = np.zeros((3,), dtype=('i4,f4,a10'))
# Now creating the columns we want to put
# in the recarray

col1 = np.arange(3) + 11                   # own experi -------------ok 
col2 = np.arange(3, dtype=np.float32)
col3 = ['Hello', 'World','World1']

# Here we create a list of tuples that is
# identical to the previous toadd list.
toadd = zip(col1, col2, col3)

# Assigning values to recarr
recarr1[:] = toadd

print recarr1

# Assigning names to each column, which
# are now by default called 'f0', 'f1', and 'f2'.

recarr1.dtype.names = ('Integers' , 'Floats', 'Strings')

print "------------------"
print recarr1

# If we want to access one of the columns by its name, we
# can do the following.
print recarr1('Integers') # GIVES an Error

# array([1, 2], dtype=int32)

[(11, 0.0, 'Hello') (12, 1.0, 'World') (13, 2.0, 'World1')]
------------------
[(11, 0.0, 'Hello') (12, 1.0, 'World') (13, 2.0, 'World1')]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-412edbe9bd24> in <module>()
     27 # If we want to access one of the columns by its name, we
     28 # can do the following.
---> 29 print recarr1('Integers')
     30 
     31 # array([1, 2], dtype=int32)

TypeError: 'numpy.ndarray' object is not callable

In [19]:

#Indexing and Slicing
#Python index lists begin at zero and theNumPy arrays follow suit. When indexing lists
#in Python, we normally do the following for a 2 × 2 object:


alist_1=[[1.1,2],[3,4]]

# To return the (0,1) element we must index as shown below.

print alist_1[0][0]

print alist_1[0][1]

print alist_1[1][0]

print alist_1[1][1]

In [26]:

# Converting the list defined above into an array

arr_1 = np.array(alist_1)

# To return the (0,1) element we use ...
arr_1[0,1]

# Now to access the Columns ...
print arr_1[:,0] # FIRST COLUMN 
print arr_1[:,1] # IInd COLUMN 

print arr_1[0,:] # FIRST ROW
print arr_1[1,:] # IInd ROW

[ 1.1  3. ]
[ 2.  4.]
[ 1.1  2. ]
[ 3.  4.]

In [30]:

# Creating an array
arr_2 = np.arange(5)

print arr_2

print "_______________"
# Creating the index array
index = np.where(arr_2 > 2)

print(index)
#(array([3, 4]),)

# Creating the desired array
new_arr = arr_2[index]

print "_______________"
print new_arr

[0 1 2 3 4]
_______________
(array([3, 4], dtype=int64),)
_______________
[3 4]

In [58]:

# Creating an image
img1 = np.zeros((10, 10)) + 1 # Here the + Number INT Gives the Values to be Populated in Array Elements Initially 
img1[2:-4, 2:-2] = 4.44  # This overides the values 
img1[4:-4, 4:-4] = 8.88

# img11 = np.zeros((10, 10,dtype=int)) # Invalid Syntax --- dtype=int Not Taken as Input 
#print img11

print img1

print"_______________________________"
print"_______________________________"

# Creating another image
img2 = np.zeros((10, 10)) + 1 # Here the + Number INT Gives the Values to be Populated in Array Elements Initially 
img2[2:-2, 2:-2] = 4.44  # This overides the values 
img2[4:-4, 4:-4] = 8.88

print img2
# See Plot A



# Let's filter out all values larger than 2 and less than 6.
index1 = img1 > 1
index2 = img1 < 7
compound_index = index1 & index2

print"_______________________________"
print"_______________________________"

print compound_index

# The compound statement can alternatively be written as
compound_index1 = (img1 > 3) & (img1 < 7)
img2 = np.copy(img1)
img2[compound_index1] = 0
# See Plot B.

print"_______________________________"
print"_______________________________"

print compound_index1

[[ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  8.88  8.88  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  8.88  8.88  4.44  4.44  1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]]
_______________________________
_______________________________
[[ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  8.88  8.88  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  8.88  8.88  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    4.44  4.44  4.44  4.44  4.44  4.44  1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]]
_______________________________
_______________________________
[[False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False  True  True  True  True  True  True False False]
 [False False  True  True  True  True  True  True False False]
 [False False  True  True False False  True  True False False]
 [False False  True  True False False  True  True False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]]
_______________________________
_______________________________
[[False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False  True  True  True  True  True  True False False]
 [False False  True  True  True  True  True  True False False]
 [False False  True  True False False  True  True False False]
 [False False  True  True False False  True  True False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]]

In [60]:

# Making the boolean arrays even more complex
index3 = img1 == 9
index4 = (index1 & index2) | index3
img3 = np.copy(img1)
img3[index4] = 0

print img3
# See Plot C.

[[ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    0.    0.    0.    0.    0.    0.    1.    1.  ]
 [ 1.    1.    0.    0.    0.    0.    0.    0.    1.    1.  ]
 [ 1.    1.    0.    0.    8.88  8.88  0.    0.    1.    1.  ]
 [ 1.    1.    0.    0.    8.88  8.88  0.    0.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]
 [ 1.    1.    1.    1.    1.    1.    1.    1.    1.    1.  ]]

In [66]:

import numpy as np
import numpy.random as rand

# Creating a 100-element array with random values
# from a standard normal distribution or, in other
# words, a Gaussian distribution.

# The sigma is 1 and the mean is 0.
a = rand.randn(10)



print a 

# Here we generate an index for filtering
# out undesired elements.
index = a > 0.2
b = a[index]

print"_______________________________"
print"_______________________________"

print b 


# NOT UNDERSTOOD BELOW THIS ------------------------------------------- TBD 
#
#
#
#

# We execute some operation on the desired elements.
#b = b ** 2 - 2
#b = b ** 20000 # ERROR --- C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:34: RuntimeWarning: overflow encountered in power

b = b ** 2 # Exponent Multiplication

# Then we put the modified elements back into the
# original array.
a[index] = b

print"_______________________________"
print"_______________________________"


print a

[ 0.40826517  0.10412734 -0.30009569  0.21760139  0.15201446  1.75867338
  0.3127245   1.92204658 -0.06786431 -0.52417958]
_______________________________
_______________________________
[ 0.40826517  0.21760139  1.75867338  0.3127245   1.92204658]
_______________________________
_______________________________
[ 0.16668045  0.10412734 -0.30009569  0.04735037  0.15201446  3.09293205
  0.09779661  3.69426304 -0.06786431 -0.52417958]

In [20]:

#text PArsing Starts --- Book Code gives ERROR --- Next Cell is OK 

import numpy as np 

table1 = np.loadtxt("1.txt"),dtype={'names':('IiD', 'Result', 'hType'),'formats':('S4', 'f4', 'i2')}

print table

  File "<ipython-input-20-55e777f91e53>", line 5
    table1 = np.loadtxt("1.txt"),dtype={'names':('IiD', 'Result', 'hType'),'formats':('S4', 'f4', 'i2')}
SyntaxError: can't assign to function call

In [22]:

#Stack Overflow Code --- Validate again OK 

import numpy as np 

table1 = np.genfromtxt('1.txt',dtype=None,names=('ID', 'Result', 'Type'))

print table1

[('xr21', 32.897, 1) ('xr11', 32.887, 2) ('xr31', 32.888, 3)]

In [24]:

#Stack Overflow Code --- Much better -- Gives all Details for the ARRAY 

import numpy as np 

np.genfromtxt('1.txt',dtype=None,names=('ID', 'Result', 'Type'))

# Our Text INput File === array([('xr21', 32.897, 1), ('xr11', 32.887, 2), ('xr31', 32.888, 3)],
# we are getting  - data types as === dtype=[('ID', 'S4'), ('Result', '<f8'), ('Type', '<i4')])
#in book the data types are given as === dtype=[('ID', '|S4'), ('Result', '<f4'), ('Type', '<i2')])


# Next Try 
# Our Text INput File === xr21 32.897 1999
# We are Still Getting ====  dtype=[('ID', 'S4'), ('Result', '<f8'), ('Type', '<i4')])
#

# Using -- matplotlib.mlab3 --- is recommended in Book 
# # Using -- Asciitable.   --- is recommended in Book

Out[24]:

array([('xr21', 32.897, 1999), ('xr11', 32.887, 2999),
       ('xr31', 32.888, 3999)], 
      dtype=[('ID', 'S4'), ('Result', '<f8'), ('Type', '<i4')])

In [37]:

import numpy as np

# Creating a large array
#data = np.arange(0, 3)              # Own Experi 

data = np.empty((10, 13))            # Book Example 

# Saving the array with numpy.save
np.save('test.npy', data)

# If space is an issue for large files, then
# use numpy.savez instead. It is slower than
# numpy.save because it compresses the binary
# file.
np.savez('test.npz', data)

# Loading the data array
newdata = np.load('test.npy')
#

print newdata

[[  5.12346075e-321   2.50032355e-315   5.05923221e-321   2.12199579e-314
    4.41942686e-062   5.20171780e-090   3.64460956e-086   5.64054601e-062
    6.01358989e-154   1.83087169e-076   1.28254324e-153   4.01458914e-057
    1.79339649e-052]
 [  4.44891543e-086   6.37552179e-067   6.01358998e-154   1.90099392e-052
    1.28254210e-153   4.42323271e-062   2.73650586e-057   1.74281656e-076
    5.20078136e-090   6.01346953e-154   7.27135828e-043   1.58058325e-153
    4.91415357e-062]
 [  2.89581503e-057   3.37424802e-057   5.20077765e-090   8.09234687e-153
    5.15535220e-062   7.12019405e-067   8.23585822e-067   8.31859158e-072
    2.40492330e-057   3.11530744e+179   1.95994493e-062   4.85485104e-033
    2.44223814e-154]
 [  2.58390824e-057   5.45166369e-067   8.54621477e-072   3.92259138e+179
    4.56335231e-072   6.22667632e-038   6.03690292e-154   1.72559454e-047
    1.25602829e-071   2.40741251e-057   3.11087039e+179   2.99063856e-067
    3.94369185e-062]
 [  6.04856224e-154   3.06060578e-057   9.34281992e-067   3.44353379e-086
    5.93623305e-038   6.01357291e-154   1.79658424e-052   1.80549770e-153
    2.21257395e-052   2.73650582e-057   9.15786331e-072   5.20173275e-090
    6.01346953e-154]
 [  1.17794193e-047   1.72945759e-153   8.60780548e-043   6.37142414e-067
    1.82918352e-076   5.20747800e-090   3.84568602e-086   1.03997571e-042
    5.98153109e-154   1.90097951e-052   3.93249005e-062   4.66250598e-062
    5.20554544e-090]
 [  3.44353309e-086   1.25758436e-071   2.45790200e-154   9.34282456e-067
    1.26931766e-076   5.60374513e-067   2.95067449e+179   1.95994492e-062
    1.72425397e-047   6.04999050e-154   5.06151335e-038   3.37671844e-057
    4.43974430e-038]
 [  3.43948339e+179   3.61546024e-043   1.04102601e-042   6.04419210e-154
    4.47117260e-033   1.19920921e-071   2.74383974e-057   5.45167216e-067
    1.55282822e-033   2.21379587e-052   6.03837101e-154   1.72585449e-047
    5.63673739e-062]
 [  7.40329607e-038   8.31856565e-072   6.09941526e-154   2.10938465e-052
    1.65749853e-153   4.27557911e-033   9.72202187e-072   1.65645488e-076
    5.20268400e-090   4.44891543e-086   5.39936610e-062   5.98151395e-154
    3.70078589e-033]
 [  6.00050346e-067   6.22437552e-038   5.20171394e-090   3.44353309e-086
    6.52516396e-038   6.01357869e-154   3.31560588e-033   1.39833684e-259
    1.02897886e-071   5.20461668e-090   3.84568602e-086   3.37611514e-057
    6.01357858e-154]]

In [ ]:

# Book Text Below ------------

'''
2.4.1 Linear Algebra
NumPy arrays do not behave like matrices in linear algebra by default. Instead, the
operations are mapped from each element in one array onto the next. This is quite
a useful feature, as loop operations can be done away with for efficiency. But what
about when transposing or a dot multiplication are needed? Without invoking other
classes, you can use the built-in numpy.dot and numpy.transpose to do such operations.
The syntax is Pythonic, so it is intuitive to program. Or the math purist can use
the numpy.matrix object instead. We will go over both examples below to illustrate
the differences and similarities between the two options. More importantly, we will
compare some of the advantages and disadvantages between the numpy.array and the
numpy.matrix objects.
'''

#

In [10]:

import numpy as np 
import scipy

#
#from scipy.optimize import curve_fit
#Errors ---- TBD 

#

Data Science with R and Python

Thursday 6 October 2016

Initial Experiments with SciPy and NumPy