housingprice = read.table( path_to_csv_file, sep=",", header=TRUE, stringsAsFactors=FALSE )
# Default head command. This will display 6 rows
head( housingprice )
# Head command with variable length; This will display 8 rows
head( housingprice, n=8)
#Default tail command displaying last 6 rows
tail( housingprice )
# Tail command display last n rows
tail( housingprice, n=8 )
names(housingprice) <- c("TotalPrice", "ValuePerSqFt", "IncomePerSqFtOfNeighbourhood")
Following is the details related with the commands.
# Load the ggplot2
require( ggplot2 )
# One could use histogram of the response variables to check out frequency distribution and see if it is normal distribution or otherwise. For linear regression model, the response variables should ideally by normally distributed.
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft")
# In case it is not a normal distribution and multiple modes or so, one could use commands such as facet_wrap(~variableName ) for detailed view
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft") + facet_wrap(~varName)
# Use scatterplot for exploring relationship between response and predictor variables
ggplot( housingprice, aes(x=SqFt, y=ValuePerSqFt)) + geom_point()
# b = slope, c is intercept and e is error
y = bx + c + e
One could use “lm” command such as following to fit the linear regression model:
lm( responseVariable ~ predictorVariable )
For multiple regression model, the math is following:
# Formula for multiple regression; Y represents vector for response variable whereas X may represent matrix of multiple predictor variables, bB represents vectors of coefficients
Y = XB + e
One could use “lm” command to fit the multiple regression model as well.
lm( responseVariable ~ predictorVar1 + predictorVar2 + predictorVar3 )
predictVar = predict( modelVar, newdata=dataFrameVar, se.fit=TRUE_or_FALSE, interval="Prediction or Confidence", level=0_1)
Above would print out the predicted variable along with lower and upper bound values. Based on the lower and upper range values, one could decide on whether the model was a right fit or not.
In recent years, artificial intelligence (AI) has evolved to include more sophisticated and capable agents,…
Adaptive learning helps in tailoring learning experiences to fit the unique needs of each student.…
With the increasing demand for more powerful machine learning (ML) systems that can handle diverse…
Anxiety is a common mental health condition that affects millions of people around the world.…
In machine learning, confounder features or variables can significantly affect the accuracy and validity of…
Last updated: 26 Sept, 2024 Credit card fraud detection is a major concern for credit…