use yelp_db; /*SQL QUERY 1 */ sELECT count(DISTINCT(STATE)) FROM business; SELECT count(DISTINCT(CITY)) FROM business; 51*963 all possible combinations Existing combinations select distinct state,city from business; /* SQL QUERY2 */ SELECT distinct(name) ,review_count, is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; 10 ROWS SELECT distinct(name) ,review_count , is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; /*SQL Query 3 */ select id, text, (useful + funny + cool ) as Review_Score from review limit 100; /*SQL Query 4 */ select id, text, (useful + funny + cool ) as Review_Score from review limit 10000; /*SQL Query 5 */ select character_length(text) from review; SELECT POSITION("X" IN "Harry Potter and the Order of the Phoenix") AS MatchPosition;
Posts
Showing posts from November, 2017
- Get link
- X
- Other Apps
use yelp_db; sELECT count(DISTINCT(STATE)) FROM business; SELECT count(DISTINCT(CITY)) FROM business; 51*963 all possible combinations Existing combinations select distinct state,city from business; /* SQL QUERY2 */ SELECT distinct(name) ,review_count, is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; 10 ROWS SELECT distinct(name) ,review_count , is_open as is_active FROM business WHERE STATE ='ON' order by review_count desc limit 10; /*SQL Query 3 */ select id, text, (useful + funny + cool ) as Review_Score from review limit 100; /*SQL Query 4 */ select id, text, (useful + funny + cool ) as Review_Score from review limit 10000; /*SQL Query 5 */ select character_length(text) from review; SELECT POSITION("X" IN "Harry Potter and the Order of the Phoenix") AS MatchPosition; ...
- Get link
- X
- Other Apps
use yelp_db; /* SQL Query 1 */ select ID, NAME,STATE ,count(state) from business group by STATE ORDER BY count(state) DESC; 51 Rows /*SQL Query 2 */ select STATE,count(state),AVG(review_count),AVG(stars) from business group by 1 order by 2 DESC ; 51 Rows /*SQL Query 3 */ select STATE,count(state),AVG(review_count),AVG(stars) from business group by 1 having count(state)>='1000' order by 2 DESC ; 11 Rows /*SQL Query 4 */ select state,avg(stars), avg(review_count) from business group by state order by count(state) ; 51 Rows /*SQL Query 5 */ select state,avg(stars), avg(review_count) from business GROUP BY state having avg(stars) >= 3 order by count(state); 50 Rows ; order by count(state) ; select * FROM business where state ='ABE'; select * from business; Group by STARS;
- Get link
- X
- Other Apps
SELECT * FROM yelp_db.category; businesss - review count category- restaurant; use yelp_db; select * from category c , business b where (c.category = 'Restaurants' AND b.id = c.business_id and b.review_count >1000); select b.name,b.city,b.state,b.review_count,c.category from category c , business b where (c.category = 'Restaurants' AND b.id = c.business_id ) group by b.state order by b.review_count; select b.name,b.city,b.state,b.review_count,c.category from category c , business b where (c.category = 'Restaurants' AND b.id = c.business_id ) group by b.city order by b.review_count; select b.id from review r, business b, category c where r.date > '2017-05-01 00:00:00' and b.id =r.id;
- Get link
- X
- Other Apps
library(twitteR) library(httr) chennai = searchTwitter("#chennai", n=50) chennai[[1]]$text f=function(k){ for(i in 1:50) { if(chennai[[i]]$isRetweet) {k=k+1} } return(k)} f(0) if(chennai[[10]]$isRetweet==T){print(TRUE)} else {print(FALSE)} vec=c(1,2,3) sum_vec=function(vec){ sum=0 for(i in 1:length(vec)) { sum=sum+vec[i] } return(sum)} sum_vec(vec) library(RMySQL) con=dbConnect(MySQL(),user="itlab",password="itlab@321", host="10.21.33.3",dbname="yelp_db") business_data = dbGetQuery(conn = con, statement = "select * from business") class(business_data) city_count= dbGetQuery(conn = con, statement = "select count(*),state,city from business group by state,city order by count(*) desc") city_count=aggregate(x=business_data$id, by=list(business_data$state,business_data$city),FUN =length) ordered_city_count=city_count[order(city_count$x,city_count$Group.2, decr...
list
- Get link
- X
- Other Apps
#create a numeric vector vector1<-rep(1,10) vector1 #[1] 1 1 1 1 1 1 1 1 1 1 #create a character vector - 'letters' and 'LETTERS' are predefined in R vector2<-letters[1:7] vector2 #[1] "a" "b" "c" "d" "e" #create list of two vectors L1=list(vector1,vector2) #Accessing elements of a list L1[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L1[[2]] #[1] "a" "b" "c" "d" "e" #Creating list dynamically L2<-list();L2 #list() L2[[1]]<-vector1; L2[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L2[[2]]<-vector2; L2[[1]] # [1] 1 1 1 1 1 1 1 1 1 1 L2[[2]] #[1] "a" "b" "c" "d" "e" #accessing an element in the list L2[[2]] #[1] "a" "b" "c" "d" "e" L2[[2]][2] #[1] "b" #creating list with names of elements L3=list(vector1=vector1,vector2=vector2) L3$vec...
data
- Get link
- X
- Other Apps
x<-matrix(1:10,2,5) x # [,1] [,2] [,3] [,4] [,5] # [1,] 1 3 5 7 9 # [2,] 2 4 6 8 10 y<-data.frame(x) y # X1 X2 X3 X4 X5 #1 1 3 5 7 9 # 2 2 4 6 8 10 #check class of data frame class(y) #[1] "data.frame" #check column names names(y) #[1] "X1" "X2" "X3" "X4" "X5" #update column names names(y)[1] #[1] "X1" names(y)[1]<-"column1" names(y) #[1] "column1" "X2" "X3" "X4" "X5" #creating data frame from columns zz<-data.frame(col1=rep(1,10),col2=rep(10,10)) zz # col1 col2 #1 1 10 #2 1 10 #3 1 10 #4 1 10 #5 1 10 #6 ...
matrix
- Get link
- X
- Other Apps
#converting vector to matrix x<-seq(1,10,1) x # [1] 1 2 3 4 5 6 7 8 9 10 dim(x) #NULL dim(x)<-c(2,5) x # [,1] [,2] [,3] [,4] [,5] # [1,] 1 3 5 7 9 # [2,] 2 4 6 8 10 dim(x)<-NULL x # [1] 1 2 3 4 5 6 7 8 9 10 y<-matrix(x,2,5) y # [,1] [,2] [,3] [,4] [,5] # [1,] 1 3 5 7 9 # [2,] 2 4 6 8 10 y<-matrix(x,2,5,byrow = T) y # [,1] [,2] [,3] [,4] [,5] # [1,] 1 2 3 4 5 # [2,] 6 7 8 9 10 z<-rbind(y,c(11,12...
vector
- Get link
- X
- Other Apps
#Objects in R #Single values are treated as vectors 5 "Hello" TRUE #creating a variable with single value x=5 # a single variable is stored as a vector of length 1 x x[1] #check class of an Object using class() function class(x) numericVector<-rep(1,10) numericVector #[1] 1 1 1 1 1 1 1 1 1 1 numericVector<-rep(1,10);numericVector (numericVector<-rep(1,10)) class(numericVector) #[1] "numeric" mode(numericVector) #Creating numeric vector using c() function numericVector<-c(1,2,3) numericVector #[1] 1 2 3 #Creating a character vector using c() function characterVector<-c("abc","def","xyz") characterVector #[1] "abc" "def" "xyz" #Creating a logical vector logicalVecor<-c(TRUE,FALSE,FALSE) logicalVecor #[1] TRUE FALSE FALSE logicalVecor1<-c(T,F,F) logicalVecor1 #[1] TRUE FALSE FALSE #count number of TRUE in a logical vector sum(logicalVec...
- Get link
- X
- Other Apps
You have been provided with business.RData file from yelp_db. Open this in the R studio. (You should see the data element imported in Global Environment Window.) Address the following queries using R coding. Write all codes in an R script file and submit only as an individual assignment. Q1. (a) View the dataset to know what is stored in the data element. What is the R code executed in the command window? (b) Which data structure is used to store the data? (Is it vector/matrix/list/data.frame?) Find the names of the columns of your data? (c) is_open column in the data element represents whether the business is open or not. Find out how many businesses are open and how many are closed. (Use table() function to do that) table(business$is_open) (d) How many different attributes are there in attribute column ?names(business$attributes) ncol(business$attributes) (e) Find the average stars and average review count for all the businesses. Q2. We want to subset the ...
- Get link
- X
- Other Apps
x=10 if(x==1) { print(x) }else if(x==10) { print("ten") }else { print("nothing") } #[1] "ten" ifelse(x<5,"yes","no") #[1] "no" ifelse(x<5,0,1) #[1] 1 # One interesting issue with R when dealing with "else" statement. # Let's try two code snippets to understand this issue # code 1 x=10 if(x==10) { print(x) }else { print('wrong') } # Output # x=10 # > if(x==10) # + { # + print(x) # + } # [1] 10 # > else # Error: unexpected 'else' in "else" # > { # + print('wrong') # + } # [1] "wrong" # Code 2 x=10 if(x==10) { print(x) }else { print('wrong') } #[1] 10 # As you can see that in code 1, the else statement starts after }, in this case, R don't know if you have else block or not, hence, an error.
- Get link
- X
- Other Apps
#create a new function mySum<-function(x,y) { return(x+y) } mySum(3,4) #[1] 7 #get body of a function - just type the name of the function mySum # function(x,y) # { # return(x+y) # } #special function to apply any function to multiple rows and columns at once x<-data.frame(x1=seq(1,10,1),x2=2,x3=5) x # x1 x2 x3 # 1 1 2 5 # 2 2 2 5 # 3 3 2 5 # 4 4 2 5 # 5 5 2 5 # 6 6 2 5 # 7 7 2 5 # 8 8 2 5 # 9 9 2 5 # 10 10 2 5 #apply function by column apply(x,2,mean) # x1 x2 x3 # 5.5 2.0 5.0 #apply function by row apply(x,1,mean) # [1] 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000 5.333333 5.666667 age=seq(11,20,1) age #[1] 11 12 13 14 15 16...
- Get link
- X
- Other Apps
library('twitteR') library('httr') #go to https://apps.twitter.com/ and register a new app. #click on app #click on keys and access tokens tab # you will get following credentials from twitter #consumer key #consumer secret #Access token #access token secret #setup OAuth setup_twitter_oauth(consumer_key="*****", consumer_secret="******", access_token="****", access_secret="*****") #Search for tweets containing hashtag #chennai chennai = searchTwitter("#chennai", n=5000) ### find how many tweets are retweets of other tweets
conditional
- Get link
- X
- Other Apps
#special function to apply any function to multiple rows and columns at once x<-data.frame(x1=seq(1,10,1),x2=2,x3=5) x # x1 x2 x3 # 1 1 2 5 # 2 2 2 5 # 3 3 2 5 # 4 4 2 5 # 5 5 2 5 # 6 6 2 5 # 7 7 2 5 # 8 8 2 5 # 9 9 2 5 # 10 10 2 5 #apply function by column apply(x,2,mean) # x1 x2 x3 # 5.5 2.0 5.0 #apply function by row apply(x,1,mean) # [1] 2.666667 3.000000 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000 5.333333 5.666667 age=seq(11,20,1) age #[1] 11 12 13 14 15 16 17 18 19 20 m_name=letters[1:10] m_name # [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" rbind(m_name,age) # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] # m_name "a" "b" "c" "d...
- Get link
- X
- Other Apps
######################### ### connect to MySQL to fetch data ## load the required library library(RMySQL) #### create the connection con <- dbConnect(MySQL(), user = 'username', password = 'password', host = 'host address', dbname='database name') ## write the mysql query to fetch the required data data = dbGetQuery(conn = con, statement = "your MySQL statement") ### close the Mysql connection dbDisconnect(con) con=dbConnect(MySQL(),user="itlab",password="itlab@321", host="10.21.33.3",dbname="yelp_db") business_data = dbGetQuery(conn = con, statement = "select * from business") class(business_data)