Dm

 Naive bayes

# Installing Packages

install.packages("e1071")

install.packages("caTools")

install.packages("caret")


# Loading package

library(e1071)

library(caTools)

library(caret)


# Splitting data into train

# and test data

split <- sample.split(iris, SplitRatio = 0.7)

train_cl <- subset(iris, split == "TRUE")

test_cl <- subset(iris, split == "FALSE")


# Feature Scaling

train_scale <- scale(train_cl[, 1:4])

test_scale <- scale(test_cl[, 1:4])


# Fitting Naive Bayes Model 

# to training dataset

set.seed(120) # Setting Seed

classifier_cl <- naiveBayes(Species ~ ., data = train_cl)

classifier_cl


# Predicting on test data'

y_pred <- predict(classifier_cl, newdata = test_cl)


# Confusion Matrix

cm <- table(test_cl$Species, y_pred)

cm


# Model Evaluation

confusionMatrix(cm)


Decision tree

#install packages

install.packages("rpart")

install.packages("rpart.plot")

install.packages("caret")


#library

library(rpart)

library(rpart.plot)

library(caret)


data(iris)

set.seed(123)

train_index <- createDataPartition(iris$Species, p = 0.8, list = FALSE)

train_data <- iris[train_index, ]

test_data <- iris[-train_index, ]

****************Train the Model*************************

tree_model <- rpart(Species ~ ., 

                    data = train_data, 

                    method = "class", # For classification

                    control = rpart.control(minsplit = 10, cp = 0.01))

#Visualize the Tree

rpart.plot(tree_model, box.palette = "auto", nn = TRUE)


#Make predictions and Evaluate

predictions <- predict(tree_model, test_data, type = "class")

confusionMatrix(predictions, test_data$Species)



#Prune the Tree

printcp(tree_model) # Identify optimal cp (xerror is minimized)

optimal_cp <- tree_model$cptable[which.min(tree_model$cptable[, "xerror"]), "CP"]

pruned_tree <- prune(tree_model, cp = optimal_cp)

rpart.plot(pruned_tree)


#Tuning Hyperparameters

control <- trainControl(method = "cv", number = 10) # 10-fold cross-validation

tuned_tree <- train(Species ~ ., 

                    data = iris, 

                    method = "rpart",

                    trControl = control,

                    tuneGrid = expand.grid(cp = seq(0.01, 0.1, 0.01)))

print(tuned_tree)


Random forest 


# Loading data 

data(iris) 

  

# Structure  

str(iris) 


install.packages("caTools") # For sampling the dataset 

install.packages("randomForest")

# Loading package 

library(caTools) 

library(randomForest) 

  

# Splitting data in train and test data 

split <- sample.split(iris, SplitRatio = 0.7) 

split 

  

train <- subset(iris, split == "TRUE") 

test <- subset(iris, split == "FALSE") 

# Fitting Random Forest to the train dataset 

set.seed(120) # Setting seed 

classifier_RF = randomForest(x = train[-5], 

                             y = train$Species, 

                             ntree = 500) 

  

classifier_RF 

  

# Predicting the Test set results 

y_pred = predict(classifier_RF, newdata = test[-5]) 

  

# Confusion Matrix 

confusion_mtx = table(test[, 5], y_pred) 

confusion_mtx 

  

# Plotting model 

plot(classifier_RF) 

  

# Importance plot 

importance(classifier_RF) 

  

# Variable importance plot 

varImpPlot(classifier_RF)


Hierarchical Clustering in R Programming


install.packages("dplyr")

library(dplyr)

head(mtcars)


distance_mat <- dist(mtcars, method = 'euclidean')

distance_mat

 


set.seed(240) # Setting seed

Hierar_cl <- hclust(distance_mat, method = "average")

Hierar_cl

plot(Hierar_cl)

 


abline(h = 110, col = "green")

 


fit <- cutree(Hierar_cl, k = 3 )

fit

 

table(fit)

rect.hclust(Hierar_cl, k = 3, border = "green")


Apriori algorithm


install.packages("packages")

 library(arules)

data(Groceries)

transactions<-Groceries

 summary(transactions)

rules <- apriori(Groceries,parameter = list(supp = 0.01, conf = 0.2))

  inspect(rules[1:10])

Comments

Popular posts from this blog

RDBMS

.Net

1-5