Posts

Showing posts from November, 2017
use yelp_db; /*SQL QUERY 1 */ sELECT count(DISTINCT(STATE)) FROM business; SELECT count(DISTINCT(CITY)) FROM business; 51*963 all possible combinations Existing combinations select distinct state,city from business; /* SQL QUERY2 */ SELECT distinct(name) ,review_count, is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; 10 ROWS SELECT distinct(name) ,review_count , is_open as is_active FROM business WHERE STATE ='ON'  order by review_count desc limit 10; /*SQL Query 3 */ select  id, text, (useful + funny + cool ) as  Review_Score from review limit 100; /*SQL Query 4 */ select  id, text, (useful + funny + cool ) as  Review_Score from review limit 10000; /*SQL Query 5 */ select character_length(text) from review; SELECT POSITION("X" IN "Harry Potter and the Order of the Phoenix") AS MatchPosition;
use yelp_db; sELECT count(DISTINCT(STATE)) FROM business; SELECT count(DISTINCT(CITY)) FROM business; 51*963 all possible combinations Existing combinations select distinct state,city from business; /* SQL QUERY2 */ SELECT distinct(name) ,review_count, is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; 10 ROWS SELECT distinct(name) ,review_count , is_open as is_active FROM business WHERE STATE ='ON'  order by review_count desc limit 10; /*SQL Query 3 */ select  id, text, (useful + funny + cool ) as  Review_Score from review limit 100; /*SQL Query 4 */ select  id, text, (useful + funny + cool ) as  Review_Score from review limit 10000; /*SQL Query 5 */ select character_length(text) from review;   SELECT POSITION("X" IN "Harry Potter and the Order of the Phoenix") AS MatchPosition;         ...
use yelp_db; /* SQL Query 1 */ select ID, NAME,STATE ,count(state) from business group by STATE ORDER BY count(state) DESC; 51 Rows /*SQL Query 2 */ select STATE,count(state),AVG(review_count),AVG(stars) from business group by 1 order by 2 DESC ; 51 Rows /*SQL Query 3 */ select STATE,count(state),AVG(review_count),AVG(stars) from business group by 1 having count(state)>='1000' order by 2 DESC ; 11 Rows /*SQL Query 4 */ select state,avg(stars), avg(review_count) from business group by state order by count(state) ; 51 Rows /*SQL Query 5 */ select state,avg(stars), avg(review_count) from business GROUP BY state having  avg(stars) >= 3 order by count(state);  50 Rows ;         order by count(state) ; select * FROM business where state ='ABE'; select * from business; Group by STARS;
SELECT * FROM yelp_db.category; businesss - review count category- restaurant; use yelp_db; select * from category c , business b where (c.category = 'Restaurants'  AND b.id = c.business_id and b.review_count >1000); select b.name,b.city,b.state,b.review_count,c.category from category c , business b where (c.category = 'Restaurants'  AND b.id = c.business_id ) group  by b.state order by b.review_count; select b.name,b.city,b.state,b.review_count,c.category from category c , business b where (c.category = 'Restaurants'  AND b.id = c.business_id ) group  by b.city order by b.review_count; select b.id from review r, business b, category c where r.date > '2017-05-01 00:00:00' and b.id =r.id;

twitter

library(twitteR) library(httr) chennai = searchTwitter("#chennai", n=50) chennai[[1]]$text f=function(k){ for(i in 1:50) {   if(chennai[[i]]$isRetweet)     {k=k+1} } return(k)} f(0) if(chennai[[10]]$isRetweet==T){print(TRUE)} else {print(FALSE)} vec=c(1,2,3) sum_vec=function(vec){   sum=0   for(i in 1:length(vec))   {     sum=sum+vec[i]   }   return(sum)} sum_vec(vec) library(RMySQL) con=dbConnect(MySQL(),user="itlab",password="itlab@321", host="10.21.33.3",dbname="yelp_db") business_data = dbGetQuery(conn = con, statement = "select * from business") class(business_data) city_count= dbGetQuery(conn = con, statement = "select count(*),state,city from business  group by state,city order by count(*) desc") city_count=aggregate(x=business_data$id, by=list(business_data$state,business_data$city),FUN =length) ordered_city_count=city_count[order(city_count$x,city_count$Group.2, decr...

list

#create a numeric vector vector1<-rep(1,10) vector1  #[1] 1 1 1 1 1 1 1 1 1 1 #create a character vector - 'letters' and 'LETTERS' are predefined in R vector2<-letters[1:7] vector2 #[1] "a" "b" "c" "d" "e" #create list of two vectors L1=list(vector1,vector2) #Accessing elements of a list L1[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L1[[2]] #[1] "a" "b" "c" "d" "e" #Creating list dynamically L2<-list();L2 #list() L2[[1]]<-vector1; L2[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L2[[2]]<-vector2; L2[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L2[[2]] #[1] "a" "b" "c" "d" "e" #accessing an element in the list L2[[2]] #[1] "a" "b" "c" "d" "e" L2[[2]][2] #[1] "b" #creating list with names of elements L3=list(vector1=vector1,vector2=vector2) L3$vec...

data

x<-matrix(1:10,2,5) x #     [,1] [,2] [,3] [,4] [,5] # [1,]    1    3    5    7    9 # [2,]    2    4    6    8   10 y<-data.frame(x) y #  X1 X2 X3 X4 X5 #1  1  3  5  7  9 # 2  2  4  6  8 10 #check class of data frame class(y) #[1] "data.frame" #check column names names(y) #[1] "X1" "X2" "X3" "X4" "X5" #update column names names(y)[1] #[1] "X1" names(y)[1]<-"column1" names(y) #[1] "column1" "X2"      "X3"      "X4"      "X5" #creating data frame from columns zz<-data.frame(col1=rep(1,10),col2=rep(10,10)) zz #   col1 col2 #1     1   10 #2     1   10 #3     1   10 #4     1   10 #5     1   10 #6  ...

matrix

#converting vector to matrix x<-seq(1,10,1) x # [1]  1  2  3  4  5  6  7  8  9 10 dim(x) #NULL dim(x)<-c(2,5) x #     [,1] [,2] [,3] [,4] [,5] # [1,]    1    3    5    7    9 # [2,]    2    4    6    8   10 dim(x)<-NULL x # [1]  1  2  3  4  5  6  7  8  9 10 y<-matrix(x,2,5) y #     [,1] [,2] [,3] [,4] [,5] # [1,]    1    3    5    7    9 # [2,]    2    4    6    8   10 y<-matrix(x,2,5,byrow = T) y #     [,1] [,2] [,3] [,4] [,5] # [1,]    1    2    3    4    5 # [2,]    6    7    8    9   10 z<-rbind(y,c(11,12...

vector

#Objects in R #Single values are treated as vectors 5 "Hello" TRUE #creating a variable with single value x=5 # a single variable is stored as a vector of length 1 x x[1] #check class of an Object using class() function class(x) numericVector<-rep(1,10) numericVector  #[1] 1 1 1 1 1 1 1 1 1 1 numericVector<-rep(1,10);numericVector (numericVector<-rep(1,10)) class(numericVector) #[1] "numeric" mode(numericVector) #Creating numeric vector using c() function numericVector<-c(1,2,3) numericVector #[1] 1 2 3 #Creating a character vector using c() function characterVector<-c("abc","def","xyz") characterVector #[1] "abc" "def" "xyz" #Creating a logical vector logicalVecor<-c(TRUE,FALSE,FALSE) logicalVecor #[1]  TRUE FALSE FALSE logicalVecor1<-c(T,F,F) logicalVecor1 #[1]  TRUE FALSE FALSE #count number of TRUE in a logical vector sum(logicalVec...
You have been provided with business.RData file from yelp_db. Open this in the R studio. (You should see the data element imported in Global Environment Window.) Address the following queries using R coding. Write all codes in an R script file and submit only as an individual assignment. Q1. (a) View the dataset to know what is stored in the data element. What is the R code executed in the command window? (b) Which data structure is used to store the data? (Is it vector/matrix/list/data.frame?) Find the names of the columns of your data? (c) is_open column in the data element represents whether the business is open or not. Find out how many businesses are open and how many are closed. (Use table() function to do that) table(business$is_open)  (d) How many different attributes are there in attribute column ?names(business$attributes) ncol(business$attributes)  (e) Find the average stars and average review count for all the businesses. Q2. We want to subset the ...
 x=10  if(x==1)  {    print(x)  }else if(x==10)  {    print("ten")  }else  {    print("nothing")  } #[1] "ten"    ifelse(x<5,"yes","no") #[1] "no"  ifelse(x<5,0,1) #[1] 1   # One interesting issue with R when dealing with "else" statement. # Let's try two code snippets to understand this issue # code 1 x=10 if(x==10) {   print(x) }else {   print('wrong') } # Output #  x=10 # > if(x==10) # + { # +   print(x) # + } # [1] 10 # > else # Error: unexpected 'else' in "else" # > { # +   print('wrong') # + } # [1] "wrong" # Code 2  x=10  if(x==10)  {    print(x)  }else  {    print('wrong')  } #[1] 10 # As you can see that in code 1, the else statement starts after }, in this case, R don't know if you have else block or not, hence, an error.
#create a new function mySum<-function(x,y)  {   return(x+y) } mySum(3,4) #[1] 7 #get body of a function - just type the name of the function mySum # function(x,y) # { #   return(x+y) # } #special function to apply any function to multiple rows and columns at once x<-data.frame(x1=seq(1,10,1),x2=2,x3=5) x #    x1 x2 x3 # 1   1  2  5 # 2   2  2  5 # 3   3  2  5 # 4   4  2  5 # 5   5  2  5 # 6   6  2  5 # 7   7  2  5 # 8   8  2  5 # 9   9  2  5 # 10 10  2  5 #apply function by column apply(x,2,mean) #  x1  x2  x3 # 5.5 2.0 5.0 #apply function by row apply(x,1,mean) # [1] 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000 5.333333 5.666667 age=seq(11,20,1) age #[1] 11 12 13 14 15 16...

twitter

library('twitteR') library('httr') #go to https://apps.twitter.com/ and register a new app. #click on app #click on keys and access tokens tab # you will get following credentials from twitter #consumer key #consumer secret #Access token #access token secret #setup OAuth setup_twitter_oauth(consumer_key="*****",                     consumer_secret="******",                     access_token="****",                     access_secret="*****") #Search for tweets containing hashtag #chennai chennai = searchTwitter("#chennai", n=5000) ### find how many tweets are retweets of other tweets

conditional

#special function to apply any function to multiple rows and columns at once x<-data.frame(x1=seq(1,10,1),x2=2,x3=5) x #    x1 x2 x3 # 1   1  2  5 # 2   2  2  5 # 3   3  2  5 # 4   4  2  5 # 5   5  2  5 # 6   6  2  5 # 7   7  2  5 # 8   8  2  5 # 9   9  2  5 # 10 10  2  5 #apply function by column apply(x,2,mean) #  x1  x2  x3 # 5.5 2.0 5.0 #apply function by row apply(x,1,mean) # [1] 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000 5.333333 5.666667 age=seq(11,20,1) age #[1] 11 12 13 14 15 16 17 18 19 20 m_name=letters[1:10] m_name # [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" rbind(m_name,age) #        [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] # m_name "a"  "b"  "c"  "d...
######################### ### connect to MySQL to fetch data ## load the required library library(RMySQL) #### create the connection con <- dbConnect(MySQL(),                  user = 'username',                  password = 'password',                  host = 'host address',                  dbname='database name') ## write the mysql query to fetch the required data data = dbGetQuery(conn = con, statement = "your MySQL statement") ### close the Mysql connection dbDisconnect(con) con=dbConnect(MySQL(),user="itlab",password="itlab@321", host="10.21.33.3",dbname="yelp_db") business_data = dbGetQuery(conn = con, statement = "select * from business") class(business_data)