housingprice = read.table( path_to_csv_file, sep=",", header=TRUE, stringsAsFactors=FALSE )
# Default head command. This will display 6 rows
head( housingprice )
# Head command with variable length; This will display 8 rows
head( housingprice, n=8)
#Default tail command displaying last 6 rows
tail( housingprice )
# Tail command display last n rows
tail( housingprice, n=8 )
names(housingprice) <- c("TotalPrice", "ValuePerSqFt", "IncomePerSqFtOfNeighbourhood")
Following is the details related with the commands.
# Load the ggplot2
require( ggplot2 )
# One could use histogram of the response variables to check out frequency distribution and see if it is normal distribution or otherwise. For linear regression model, the response variables should ideally by normally distributed.
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft")
# In case it is not a normal distribution and multiple modes or so, one could use commands such as facet_wrap(~variableName ) for detailed view
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft") + facet_wrap(~varName)
# Use scatterplot for exploring relationship between response and predictor variables
ggplot( housingprice, aes(x=SqFt, y=ValuePerSqFt)) + geom_point()
# b = slope, c is intercept and e is error
y = bx + c + e
One could use “lm” command such as following to fit the linear regression model:
lm( responseVariable ~ predictorVariable )
For multiple regression model, the math is following:
# Formula for multiple regression; Y represents vector for response variable whereas X may represent matrix of multiple predictor variables, bB represents vectors of coefficients
Y = XB + e
One could use “lm” command to fit the multiple regression model as well.
lm( responseVariable ~ predictorVar1 + predictorVar2 + predictorVar3 )
predictVar = predict( modelVar, newdata=dataFrameVar, se.fit=TRUE_or_FALSE, interval="Prediction or Confidence", level=0_1)
Above would print out the predicted variable along with lower and upper bound values. Based on the lower and upper range values, one could decide on whether the model was a right fit or not.
Artificial Intelligence (AI) agents have started becoming an integral part of our lives. Imagine asking…
In the ever-evolving landscape of agentic AI workflows and applications, understanding and leveraging design patterns…
In this blog, I aim to provide a comprehensive list of valuable resources for learning…
Have you ever wondered how systems determine whether to grant or deny access, and how…
What revolutionary technologies and industries will define the future of business in 2025? As we…
For data scientists and machine learning researchers, 2024 has been a landmark year in AI…