搜索此博客

2017年12月15日星期五

Deep learning in R with h2o

library(MASS)
library(h2o)

set.seed(123)

DataFrame <- Boston

#Structure of Boston
str(DataFrame)

#Histgram of the Boston data
hist(DataFrame$medv)

#Check the dimention of this data frame
dim(DataFrame)

head(DataFrame)

#min and max value for each of the variable
apply(DataFrame,2,range)

#scale function will give mean=0 and standard deviation=1 for each variable
maxValue <- apply(DataFrame,2,max)
minValue <- apply(DataFrame,2,min)
DataFrame <- as.data.frame(scale(DataFrame,center = minValue,scale = maxValue-minValue))

#h2o initialization
h2o.init(ip="localhost",port = 54321,max_mem_size = "3000m")

#Defining x and y
y <- "medv"
x <- setdiff(colnames(DataFrame),y)

#create the train and test data set
ind <- sample(1:nrow(DataFrame),400)
trainDF <- DataFrame[ind,]
testDF <- DataFrame[-ind,]

#Fitting the model
model <- h2o.deeplearning(x=x,
                          y=y,
                          seed = 1234,
                          training_frame = as.h2o(trainDF),
                          nfolds = 3,
                          stopping_rounds = 7,
                          epochs = 400,
                          overwrite_with_best_model = T,
                          activation = "Tanh",
                          input_dropout_ratio = 0.1,
                          hidden = c(10,10),
                          l1=6e-4,
                          loss = "Automatic",
                          distribution = "AUTO",
                          stopping_metric = "MSE")

plot(model)

#predictions
predictions <- as.data.frame(predict(model,as.h2o(testDF)))

str(predictions)

#MSE
sum((predictions$predict-testDF$medv)^2/nrow(testDF))

#plotting actual vs predicted values
plot(testDF$medv,predictions$predict,col="blue",main = 'Real vs Predicted',
     pch=1,cex=0.9,type = 'p',xlab = 'Actual',ylab = 'Predicted')
abline(0,1,col="black")

h2o.shutdown(prompt = F)

Rstudio 实用快捷键

 %in%    Ctrl+Shift+Alt+K
 %>%     Ctrl+Shift+M
<-           Alt+-

2017年12月4日星期一

R 中文正则表达式

library(stringr)
word <- c("在线正则表达式测试")
pattern <- '^[\u4E00-\u9FA5]+$'
#返回了向量x中哪个元素匹配了模式pattern(即返回了向量x的某些下标)或者具体哪个元素匹配了模式(通过设置value参数来完成)
grep(pattern,word)
#检测字符是否存在某些指定模式
str_detect(word,pattern)
#向量word中的每个元素是否匹配了pattern,即只返回TRUE或FALSE
grepl(pattern, word)

#regexpr(),gregexpr()和regexec()函数同样也可用来进行字符串搜索