Rohit @Strategic Leadership India
Web Scraping with “RSelenium”
# R Code Starts here ...
# devtools::install_github("ropensci/RSelenium")
library("RSelenium", lib.loc="~/R/win-library/3.1")
library(RSelenium)
# start the server if one isnt running
startServer()# use default server initialisation values
remDr <- remoteDriver$new()
remDr$open(silent=T) # Opens up a Firefox window ...
library("XML")
remDr$navigate("http://www.shopclues.com/mobiles/mobile-phones.html")
#
webElems <- remDr$findElements(using = 'css selector', ".name")
CSS_Text_Headers<- unlist(lapply(webElems, function(x){x$getElementText()}))
CSS_Text_Headers # Not recommended ...large output
[1] “Zen Ultraphone Powermax ( 4200 MAh, 5" IPS, Quadcore, 1GB….”
[2] “Zen X8i Shine + Speakers+ Aluma Wallet + Watch”
[3] “Nosama VIRAAT The Phone With Inbuilt Power Bank”
[4] “Nokia 105 (Nokia Pack Of 3)”
[5] “Zopo ZP951 Speed 7 With 3GB RAM + 16GB ROM”
[6] “ADCOM MOBILE X5 WITH VOICE CHANGER FEATURE - Black & Orange”
[7] “Gionee F103 - 3 GB RAM”
[8] “OnePlus One(64GB)- 1 Year Manufacturer Warranty”
[9] “Kenxinda K1( 1GB RAM, MTK 1.3GHz Quadcore) & Free Leather….”
[10] “NOSAMA CHARCOAL 5 QS1 WITH 13 MP ROTATING CAMERA”
[11] “Samsung Galaxy J7”
[12] “Panasonic P55 Novo 2GB 16GB Champ Gold”
[13] “Gionee Marathon M3”
[14] “SAMSUNG GALAXY J5”
[15] “Adcom X14 Chatty With Whatsapp/Facebook-black”
[16] “Rage Yo C Mobile Dual Sim With Bluetooth, MP3 Player Red &….”
[17] “REACH SENSE 402”
[18] “I-Smart IS 201i LITE ( WhatsApp And Advanced Auto Call Recording)”
[19] “I-SMART 111 LITE (with Gujrati , Bengali & Hindi Language)….”
[20] “I-Smart 301i ELITE ( With Whats App )”
[21] “Intex Aqua Power Plus (Champagne)(2GB RAM+ 16GB ROM, 4000mAh Battery)” [22] “Lava IRIS Fuel F1 (4000mah Battery, 2gb, 5, 8/2mp Camera, 8gb) 1….” [23] “Google Nexus 5x (32GB, Corbon )”
[24] “I-SMART IS 100L (1000 MAh Battery & Digital Camera, Bluetooth,….”
[25] “Zopo ZP951 Speed 7 With 3GB RAM + 16GB ROM - Black”
[26] “FORME K09 Red”
[27] “Josh Topaz Dual SIM Mobile Buy One Get One Free”
[28] “Gionee Elife S7”
[29] “Google Nexus 5X (16GB, Carbon)”
[30] “Kestrel KM 100 Multimedia Phone (Black)”
[31] “Vell Com V-07 Dual Sim GSM With Whatsapp Multimedia Camera Mobile”
[32] “MicroSoft Lumia 640 XL Dual Sim”
[33] “INTEX MOBILE AquaPlay”
[34] “HTC Desire 526G + (16 GB)”
[35] “Micromax Bolt A58 Red”
[36] “HTC Desire 626g Plus ( Blue )”
[37] “Samsung Galaxy Core 2”
[38] “Samsung Galaxy Core Prime”
[39] “Panasonic Eluga S (White)”
[40] “Vox Kick K4 Android Kitkat Smart Phone Black”
[41] “Celkon Campus Nova A352E (White)”
[42] “Panasonic Eluga A (White)”
[43] “XOLT Mobile Phone XS1”
[44] “Forme Summer S700 (Red)”
[45] “MTS CG 131 ( ZTE D286 ) CDMA + GSM MOBILE FOR TATA RELIANCE MTS….”
[46] “ZTE N799D BLADE EG EVDO+GSM CDMA ANDROID 4.1 ALL Reliance Tata….”
[2] “Zen X8i Shine + Speakers+ Aluma Wallet + Watch”
[3] “Nosama VIRAAT The Phone With Inbuilt Power Bank”
[4] “Nokia 105 (Nokia Pack Of 3)”
[5] “Zopo ZP951 Speed 7 With 3GB RAM + 16GB ROM”
[6] “ADCOM MOBILE X5 WITH VOICE CHANGER FEATURE - Black & Orange”
[7] “Gionee F103 - 3 GB RAM”
[8] “OnePlus One(64GB)- 1 Year Manufacturer Warranty”
[9] “Kenxinda K1( 1GB RAM, MTK 1.3GHz Quadcore) & Free Leather….”
[10] “NOSAMA CHARCOAL 5 QS1 WITH 13 MP ROTATING CAMERA”
[11] “Samsung Galaxy J7”
[12] “Panasonic P55 Novo 2GB 16GB Champ Gold”
[13] “Gionee Marathon M3”
[14] “SAMSUNG GALAXY J5”
[15] “Adcom X14 Chatty With Whatsapp/Facebook-black”
[16] “Rage Yo C Mobile Dual Sim With Bluetooth, MP3 Player Red &….”
[17] “REACH SENSE 402”
[18] “I-Smart IS 201i LITE ( WhatsApp And Advanced Auto Call Recording)”
[19] “I-SMART 111 LITE (with Gujrati , Bengali & Hindi Language)….”
[20] “I-Smart 301i ELITE ( With Whats App )”
[21] “Intex Aqua Power Plus (Champagne)(2GB RAM+ 16GB ROM, 4000mAh Battery)” [22] “Lava IRIS Fuel F1 (4000mah Battery, 2gb, 5, 8/2mp Camera, 8gb) 1….” [23] “Google Nexus 5x (32GB, Corbon )”
[24] “I-SMART IS 100L (1000 MAh Battery & Digital Camera, Bluetooth,….”
[25] “Zopo ZP951 Speed 7 With 3GB RAM + 16GB ROM - Black”
[26] “FORME K09 Red”
[27] “Josh Topaz Dual SIM Mobile Buy One Get One Free”
[28] “Gionee Elife S7”
[29] “Google Nexus 5X (16GB, Carbon)”
[30] “Kestrel KM 100 Multimedia Phone (Black)”
[31] “Vell Com V-07 Dual Sim GSM With Whatsapp Multimedia Camera Mobile”
[32] “MicroSoft Lumia 640 XL Dual Sim”
[33] “INTEX MOBILE AquaPlay”
[34] “HTC Desire 526G + (16 GB)”
[35] “Micromax Bolt A58 Red”
[36] “HTC Desire 626g Plus ( Blue )”
[37] “Samsung Galaxy Core 2”
[38] “Samsung Galaxy Core Prime”
[39] “Panasonic Eluga S (White)”
[40] “Vox Kick K4 Android Kitkat Smart Phone Black”
[41] “Celkon Campus Nova A352E (White)”
[42] “Panasonic Eluga A (White)”
[43] “XOLT Mobile Phone XS1”
[44] “Forme Summer S700 (Red)”
[45] “MTS CG 131 ( ZTE D286 ) CDMA + GSM MOBILE FOR TATA RELIANCE MTS….”
[46] “ZTE N799D BLADE EG EVDO+GSM CDMA ANDROID 4.1 ALL Reliance Tata….”
df<-as.data.frame(CSS_Text_Headers)
head(df);tail(df)
CSS_Text_Headers
1 Zen Ultraphone Powermax ( 4200 MAh, 5" IPS, Quadcore, 1GB…. 2 Zen X8i Shine + Speakers+ Aluma Wallet + Watch 3 Nosama VIRAAT The Phone With Inbuilt Power Bank 4 Nokia 105 (Nokia Pack Of 3) 5 Zopo ZP951 Speed 7 With 3GB RAM + 16GB ROM 6 ADCOM MOBILE X5 WITH VOICE CHANGER FEATURE - Black & Orange CSS_Text_Headers 41 Celkon Campus Nova A352E (White) 42 Panasonic Eluga A (White) 43 XOLT Mobile Phone XS1 44 Forme Summer S700 (Red) 45 MTS CG 131 ( ZTE D286 ) CDMA + GSM MOBILE FOR TATA RELIANCE MTS…. 46 ZTE N799D BLADE EG EVDO+GSM CDMA ANDROID 4.1 ALL Reliance Tata….
#
webElems1 <- remDr$findElements(using = 'css selector', ".product-price")
CSS_Text_Headers1 <- unlist(lapply(webElems1, function(x){x$getElementText()}))
df1<-as.data.frame(CSS_Text_Headers1)
head(df1);tail(df1)
CSS_Text_Headers1
1 Rs. 4,799.5,799 Rs.7,699 2 Rs. 1,299.1,499 Rs.4,126 3 Rs. 1,899.2,499 4 Rs. 3,420.4,016 Rs.4,295 5 Rs. 12,999.14,499 Rs.14,600 6 Rs. 649.899 Rs.999 CSS_Text_Headers1 41 Rs. 2,099.2,499 Rs.2,810 42 Rs. 7,140.10,447 43 Rs. 772.1,349 44 Rs. 1,395.1,999 45 Rs. 2,299.2,550 46 Rs. 6,999.7,999
# .old-price has more than 1 value for each item.
# .old-price is seen as shown by CSS Selector Gadget - but Not Seen in the Webpage code ?
webElems2 <- remDr$findElements(using = 'css selector', ".old-price")
CSS_Text_Headers2 <- unlist(lapply(webElems2, function(x){x$getElementText()}))
df2<-as.data.frame(CSS_Text_Headers2)
head(df2);tail(df2)
CSS_Text_Headers2 1 Rs.5,799 2 Rs.7,699 3 Rs.1,499 4 Rs.4,126 5 Rs.2,499 6 Rs.4,016 CSS_Text_Headers2 65 Rs.2,810 66 Rs.10,447 67 Rs.1,349 68 Rs.1,999 69 Rs.2,550 70 Rs.7,999
#
webElems3 <- remDr$findElements(using = 'css selector', ".price")
CSS_Text_Headers3 <- unlist(lapply(webElems3, function(x){x$getElementText()}))
df3<-as.data.frame(CSS_Text_Headers3)
head(df3);tail(df3)
CSS_Text_Headers3 1
2 4,799 3 1,299 4 1,899 5 3,420 6 12,999 CSS_Text_Headers3 42 2,099 43 7,140 44 772 45 1,395 46 2,299 47 6,999
2 4,799 3 1,299 4 1,899 5 3,420 6 12,999 CSS_Text_Headers3 42 2,099 43 7,140 44 772 45 1,395 46 2,299 47 6,999
#
#DF_Main<-cbind(df,df3)
# View(DF_Main)
# Take it Offline ...Knitter also throws an error as Variable Length is not same ...
getwd()
[1] “C:/STAT/RVest-2”
write.table(df,"C:/STAT/RVest-2/df.csv",sep=",")
write.table(df1,"C:/STAT/RVest-2/df1.csv",sep=",")
write.table(df2,"C:/STAT/RVest-2/df2.csv",sep=",")
write.table(df3,"C:/STAT/RVest-2/df3.csv",sep=",")
# vendors
webElems4<- remDr$findElements("css selector", "[href]")
CSS_HREF_Attr<-unlist(sapply(webElems4, function(x){x$getElementAttribute("href")})) #
# Above we were looking for Element Text - now we need HREF Attributes
# Giving a Traceback and Debug error but we can ignore that ...
df4<-as.data.frame(CSS_HREF_Attr)
head(df4);tail(df4)# Success ... 560 URL's also javascript:void(0); values ...
CSS_HREF_Attr
1 http://www.shopclues.com/mobiles/mobile-phones.html 2 android-app://com.shopclues/http/www.shopclues.com/mobiles/mobile-phones.html 3http://ads.yahoo.com/ 4 http://analytics.twitter.com/ 5 http://api.targetingmantra.com/ 6 http://api.apac-sg.zettata.com/ CSS_HREF_Attr 554http://www.shopclues.com/# 555 javascript:void(0); 556 http://www.shopclues.com/tools/fb_apps/fbaccess.php?auth=fb&page=%2Fmobiles%2Fmobile-phones.html 557 http://www.shopclues.com/tools/fb_apps/google_login/index.php?auth=google&page=%2Fmobiles%2Fmobile-phones.html 558 http://www.shopclues.com/index.php?dispatch=auth.recover_password 559javascript:void(0);
# Needs to be tackled offline
# R Code Ends here ...
Contact…
No comments:
Post a Comment