HOMEWORK WEEK 2 2023-01-23 Question 3.1 #options("scipen"=100, "digits"=10) #install.packages("rmarkdown") #Get datatset to a dataframe df path2<-file.path("/Users/~/Documents/ISYE 6501/Homework/Week 1/data 2.2/credit_card_data.txt") data2<-read.table(path2,header =FALSE) print(dim(data2))#check size of dataset of credit_card_data.txt ## [1] 65411 head(data2) ##V1V2V3V4 V5 V6 V7 V8V9 V10 V11 ## 11 30.83 0.000 1.251011 20201 ## 20 58.67 4.460 3.04106143 5601 ## 30 24.50 0.500 1.501101 280 8241 ## 41 27.83 1.540 3.751050 10031 ## 51 20.17 5.625 1.711101 12001 ## 61 32.08 4.000 2.501100 36001 Question 3.1.a Train Knn with k-fold (cv.kknn) cross-validation #Use credit_card_data.txt #install.packages("kknn") #Train Knn with k-fold (cv.kknn) cross-validation #Ref. document: https://www.rdocumentation.org/packages/kknn/versions/1.3.1/topics/tra in.kknn data=data.frame(data2) library(kknn) set.seed(123) knn_model_cv_acc_vector=c() for(Kin1:50){#loop throuh 50 values of K to check the accuracy of knn model trained with 10-fold cross validation knn_model_cv=cv.kknn(V11~., data,kcv =10,k =K,kernel = "optimal",scale =T) knn_model_cv_result=data.frame(knn_model_cv[1]) knn_pred=round(knn_model_cv_result[,2])
knn_model_cv_acc=sum(knn_pred==data[,11])/nrow(data) knn_model_cv_acc_vector<-c(knn_model_cv_acc_vector, knn_model_cv_acc) } knn_model_cv_acc_vector ##[1] 0.8119266 0.8073394 0.8134557 0.8103976 0.8547401 0.8455657 0.8455657 ##[8] 0.8425076 0.8425076 0.8409786 0.8455657 0.8470948 0.8440367 0.8547401 ## [15] 0.8516820 0.8470948 0.8440367 0.8425076 0.8455657 0.8516820 0.8425076 ## [22] 0.8425076 0.8425076 0.8394495 0.8440367 0.8425076 0.8425076 0.8425076 ## [29] 0.8363914 0.8394495 0.8440367 0.8425076 0.8455657 0.8363914 0.8348624 ## [36] 0.8440367 0.8302752 0.8348624 0.8302752 0.8287462 0.8409786 0.8333333 ## [43] 0.8440367 0.8501529 0.8409786 0.8363914 0.8318043 0.8455657 0.8379205 ## [50] 0.8409786 #Look for the best accuracy and respective K values max(knn_model_cv_acc_vector) ## [1] 0.8547401 which(knn_model_cv_acc_vector==max(knn_model_cv_acc_vector)) ## [1]5 14 #Look for the worst accuracy and respective K values min(knn_model_cv_acc_vector) ## [1] 0.8073394 which(knn_model_cv_acc_vector==min(knn_model_cv_acc_vector)) ## [1] 2 As shown above, with dataset of credit_card_data.txt and 10-fold cross validation, the proposed KNN model works well with K of 5 or 14 with the accuracy of about 85.47%, while the small K value of 2 maintains the model's acuracy at a minimum rate of about 80%.
Expert's Answer
Chat with our Experts
Want to contact us directly? No Problem. We are always here for you
Your future, our responsibilty submit your task on time.
Order NowGet Online
Assignment Help Services