housingprice = read.table( path_to_csv_file, sep=",", header=TRUE, stringsAsFactors=FALSE )
# Default head command. This will display 6 rows
head( housingprice )
# Head command with variable length; This will display 8 rows
head( housingprice, n=8)
#Default tail command displaying last 6 rows
tail( housingprice )
# Tail command display last n rows
tail( housingprice, n=8 )
names(housingprice) <- c("TotalPrice", "ValuePerSqFt", "IncomePerSqFtOfNeighbourhood")
Following is the details related with the commands.
# Load the ggplot2
require( ggplot2 )
# One could use histogram of the response variables to check out frequency distribution and see if it is normal distribution or otherwise. For linear regression model, the response variables should ideally by normally distributed.
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft")
# In case it is not a normal distribution and multiple modes or so, one could use commands such as facet_wrap(~variableName ) for detailed view
ggplot( housingprice, aes(x=ValuePerSqFt)) + geom_histogram(binwidth=10) + xlab( "Value per Sq Ft") + facet_wrap(~varName)
# Use scatterplot for exploring relationship between response and predictor variables
ggplot( housingprice, aes(x=SqFt, y=ValuePerSqFt)) + geom_point()
# b = slope, c is intercept and e is error
y = bx + c + e
One could use “lm” command such as following to fit the linear regression model:
lm( responseVariable ~ predictorVariable )
For multiple regression model, the math is following:
# Formula for multiple regression; Y represents vector for response variable whereas X may represent matrix of multiple predictor variables, bB represents vectors of coefficients
Y = XB + e
One could use “lm” command to fit the multiple regression model as well.
lm( responseVariable ~ predictorVar1 + predictorVar2 + predictorVar3 )
predictVar = predict( modelVar, newdata=dataFrameVar, se.fit=TRUE_or_FALSE, interval="Prediction or Confidence", level=0_1)
Above would print out the predicted variable along with lower and upper bound values. Based on the lower and upper range values, one could decide on whether the model was a right fit or not.
Large language models (LLMs) have fundamentally transformed our digital landscape, powering everything from chatbots and…
As Large Language Models (LLMs) evolve into autonomous agents, understanding agentic workflow design patterns has…
In today's data-driven business landscape, organizations are constantly seeking ways to harness the power of…
In this blog, you would get to know the essential mathematical topics you need to…
This blog represents a list of questions you can ask when thinking like a product…
AI agents are autonomous systems combining three core components: a reasoning engine (powered by LLM),…