搜索此博客

2016年3月30日星期三

生存分析surminer包

---
title: "surminer"
output: html_document
---

#生存分析
```{r}
#参考http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization
library("survival")
library("survminer")
```


#Draw survival curves without grouping
```{r}
# Fit survival curves
fit <- survfit(Surv(time, status) ~ 1, data = lung)
# Drawing curves
ggsurvplot(fit, color = "#2E9FDF")
```


#Draw survival curves with two groups
```{r}
# Fit survival curves
fit<- survfit(Surv(time, status) ~ sex, data = lung)

# Drawing survival curves
ggsurvplot(fit)
```


#Change font size, style and color
```{r}
# Change font style, size and color
# Change only font size
ggsurvplot(fit, main = "Survival curve",
   font.main = 18,
   font.x =  16,
   font.y = 16,
   font.tickslab = 14)

# Change font size, style and color at the same time
ggsurvplot(fit, main = "Survival curve",
   font.main = c(16, "bold", "darkblue"),
   font.x = c(14, "bold.italic", "red"),
   font.y = c(14, "bold.italic", "darkred"),
   font.tickslab = c(12, "plain", "darkgreen"))

# Change the legend title and labels
ggsurvplot(fit, legend = "bottom",
           legend.title = "Sex",
           legend.labs = c("Male", "Female"))

# Specify legend position by its coordinates
ggsurvplot(fit, legend = c(0.2, 0.2))
```

#Change line types and color palettes
```{r}
# change line size --> 1
# Change line types by groups (i.e. "strata")
# and change color palette
ggsurvplot(fit,  size = 1,  # change line size
           linetype = "strata", # change line type by groups
           break.time.by = 250, # break time axis by 250
           palette = c("#E7B800", "#2E9FDF"), # custom color palette
           conf.int = TRUE, # Add confidence interval
           pval = TRUE # Add p-value
           )

# Use brewer color palette "Dark2"
ggsurvplot(fit, linetype = "strata",
           conf.int = TRUE, pval = TRUE,
           palette = "Dark2")

# Use grey palette
ggsurvplot(fit, linetype = "strata",
           conf.int = TRUE, pval = TRUE,
           palette = "grey")
```

#Add number at risk table
```{r}
# Add risk table
# and change risk table y text colors by strata
ggsurvplot(fit, pval = TRUE, conf.int = TRUE,
           risk.table = TRUE, risk.table.y.text.col = TRUE)

# Customize the output and then print
res <- ggsurvplot(fit, pval = TRUE, conf.int = TRUE,
           risk.table = TRUE)
res$table <- res$table + theme(axis.line = element_blank())
res$plot <- res$plot + labs(title = "Survival Curves")
print(res)

# Change color, linetype by strata, risk.table color by strata
ggsurvplot(fit,
           pval = TRUE, conf.int = TRUE,
           risk.table = TRUE, # Add risk table
           risk.table.col = "strata", # Change risk table color by groups
           linetype = "strata", # Change line type by groups
           ggtheme = theme_bw(), # Change ggplot2 theme
           palette = c("#E7B800", "#2E9FDF"))
```

#Change x axis limits
```{r}
# Change x axis limits (xlim)
#++++++++++++++++++++++++++++++++++++
# One would like to cut axes at a specific time point
ggsurvplot(fit,
           pval = TRUE, conf.int = TRUE,
           risk.table = TRUE, # Add risk table
           risk.table.col = "strata", # Change risk table color by groups
           ggtheme = theme_bw(), # Change ggplot2 theme
           palette = "Dark2",
           xlim = c(0, 600))
```

#Transform survival curves: plot cumulative events and hazard function
```{r}
# Plot cumulative events
ggsurvplot(fit, conf.int = TRUE,
           palette = c("#FF9E29", "#86AA00"),
           risk.table = TRUE, risk.table.col = "strata",
           fun = "event")

# Plot the cumulative hazard function
ggsurvplot(fit, conf.int = TRUE,
           palette = c("#FF9E29", "#86AA00"),
           risk.table = TRUE, risk.table.col = "strata",
           fun = "cumhaz")

# Arbitrary function
ggsurvplot(fit, conf.int = TRUE,
          palette = c("#FF9E29", "#86AA00"),
           risk.table = TRUE, risk.table.col = "strata",
           pval = TRUE,
           fun = function(y) y*100)
```

#Survival curves with multiple groups
```{r}
# Fit (complexe) survival curves
#++++++++++++++++++++++++++++++++++++

fit2 <- survfit( Surv(time, status) ~ rx + adhere,
    data = colon )

# Visualize
#++++++++++++++++++++++++++++++++++++

# Visualize: add p-value, chang y limits
# change color using brewer palette
ggsurvplot(fit2, pval = TRUE,
           break.time.by = 800,
           risk.table = TRUE,
           risk.table.height = 0.5#Useful when you have multiple groups
           )

# Adjust risk table and survival plot heights
# ++++++++++++++++++++++++++++++++++++
# Risk table height
ggsurvplot(fit2, pval = TRUE,
          break.time.by = 800,
          risk.table = TRUE,
          risk.table.col = "strata",
          risk.table.height = 0.5,
          palette = "Dark2")

# Change legend labels
# ++++++++++++++++++++++++++++++++++++

ggsurvplot(fit2, pval = TRUE,
           break.time.by = 800,
           risk.table = TRUE,
           risk.table.col = "strata",
           risk.table.height = 0.5,
           ggtheme = theme_bw(),
           legend.labs = c("A", "B", "C", "D", "E", "F"))
```

2016年3月24日星期四

2010-2015年甘肃AEFI监测数据汇总

library(dplyr)
library(stringr)
aefi <- read.csv("C:\\Users\\liangxf\\Desktop\\页1.csv",header = T,stringsAsFactors = F)
for(i in 1:length(aefi$主键))
{
  if (nchar(aefi$反应分类[i])==0)
  {
    aefi$反应分类[i]=aefi$初步分类[i]
  }
}
for(i in 1:length(aefi$主键))
{
  if (nchar(aefi$最终临床诊断[i])==0)
  {
    aefi$最终临床诊断[i]=aefi$初步临床诊断[i]
  }
}
data <- mutate(aefi,year=substr(aefi$报告卡编号,8,11))%>%
  filter(year>=2010 & 疫苗1.疫苗名称 %in%c("Hib","百白破(无细胞)","脊灰(灭活)","甲肝(灭活)","狂犬病(Vero)","狂犬病(Vero冻干)","流感(裂解)","流脑A+C(结合)","流脑A群","轮状病毒","麻风","腮腺炎","水痘","乙肝(CHO)","乙肝(酵母)","乙脑(减毒)"))%>%
  select(year,疫苗1.疫苗名称,反应分类,最终临床诊断)

aefinew <- read.csv("C:\\Users\\liangxf\\Desktop\\AEFI个案信息.csv",skip = 1,header = T,stringsAsFactors = F)
for(i in 1:length(aefinew$编码))
{
  if (nchar(aefinew$最终临床诊断[i])==0)
  {
    aefinew$最终临床诊断[i]=aefinew$初步临床诊断[i]
  }
}

datanew <- mutate(aefinew,year=substr(aefinew$编码,10,13))%>%
  filter(疫苗1.疫苗名称 %in%c("Hib疫苗","百白破(无细胞)","脊灰疫苗(灭活Salk株)","甲肝(灭活)","狂犬病疫苗(Vero)","狂犬病疫苗(Vero冻干)","流感疫苗(裂解)","流脑A+C疫苗(结合)","A群流脑疫苗","轮状病毒疫苗","麻风疫苗","腮腺炎疫苗","水痘疫苗","乙肝疫苗(CHO)","乙肝疫苗(汉逊酵母)","乙肝疫苗(酿酒酵母)","乙脑疫苗(减毒)"))%>%
  select(year,疫苗1.疫苗名称,反应分类,最终临床诊断)


resul <- rbind(data,datanew)

res <- xtabs(~year+反应分类,data=resul)
write.csv(res,"C:\\Users\\liangxf\\Desktop\\resul.csv")


res <- xtabs(~最终临床诊断+year,data=resul)
write.csv(res,"C:\\Users\\liangxf\\Desktop\\resul.csv")


da <- filter(resul,反应分类=="一般反应")
res <- xtabs(~疫苗1.疫苗名称+最终临床诊断,data=da)
write.csv(res,"C:\\Users\\liangxf\\Desktop\\resul.csv")

da <- filter(resul,反应分类=="异常反应")
res <- xtabs(~疫苗1.疫苗名称+最终临床诊断,data=da)
write.csv(res,"C:\\Users\\liangxf\\Desktop\\resul2.csv")


result <- filter(aefi,反应分类=="一般反应" & 发热=="≥38.6")%>%
  select(反应分类,发热,疫苗1.疫苗名称,疫苗1.生产企业)%>%
  filter(疫苗1.疫苗名称 %in%c("Hib","百白破(无细胞)","脊灰(灭活)","甲肝(灭活)","狂犬病(Vero)","狂犬病(Vero冻干)","流感(裂解)","流脑A+C(结合)","流脑A群","轮状病毒","麻风","腮腺炎","水痘","乙肝(CHO)","乙肝(酵母)","乙脑(减毒)"))
resultnew <- filter(aefinew,反应分类=="一般反应" & 发热.腋温..范围=="≥38.6℃")%>%
  select(反应分类,发热=发热.腋温..范围,疫苗1.疫苗名称,疫苗1.生产企业)%>%
  filter(疫苗1.疫苗名称 %in%c("Hib疫苗","百白破(无细胞)","脊灰疫苗(灭活Salk株)","甲肝(灭活)","狂犬病疫苗(Vero)","狂犬病疫苗(Vero冻干)","流感疫苗(裂解)","流脑A+C疫苗(结合)","A群流脑疫苗","轮状病毒疫苗","麻风疫苗","腮腺炎疫苗","水痘疫苗","乙肝疫苗(CHO)","乙肝疫苗(汉逊酵母)","乙肝疫苗(酿酒酵母)","乙脑疫苗(减毒)"))

fare <- rbind(result,resultnew)
res <- as.data.frame(ftable(xtabs(~疫苗1.疫苗名称+疫苗1.生产企业+反应分类,data=fare)))
fare <- filter(res,Freq>0)
write.csv(fare,"C:\\Users\\liangxf\\Desktop\\fare.csv")


result <- filter(aefi,反应分类=="异常反应" & 最终临床诊断=="过敏性皮疹")%>%
  select(反应分类,最终临床诊断,疫苗1.疫苗名称,疫苗1.生产企业)%>%
  filter(疫苗1.疫苗名称 %in%c("Hib","百白破(无细胞)","脊灰(灭活)","甲肝(灭活)","狂犬病(Vero)","狂犬病(Vero冻干)","流感(裂解)","流脑A+C(结合)","流脑A群","轮状病毒","麻风","腮腺炎","水痘","乙肝(CHO)","乙肝(酵母)","乙脑(减毒)"))
resultnew <- filter(aefinew,反应分类=="异常反应" & 最终临床诊断=="过敏性皮疹")%>%
  select(反应分类,最终临床诊断,疫苗1.疫苗名称,疫苗1.生产企业)%>%
  filter(疫苗1.疫苗名称 %in%c("Hib疫苗","百白破(无细胞)","脊灰疫苗(灭活Salk株)","甲肝(灭活)","狂犬病疫苗(Vero)","狂犬病疫苗(Vero冻干)","流感疫苗(裂解)","流脑A+C疫苗(结合)","A群流脑疫苗","轮状病毒疫苗","麻风疫苗","腮腺炎疫苗","水痘疫苗","乙肝疫苗(CHO)","乙肝疫苗(汉逊酵母)","乙肝疫苗(酿酒酵母)","乙脑疫苗(减毒)"))

pizhen <- rbind(result,resultnew)
res <- as.data.frame(ftable(xtabs(~疫苗1.疫苗名称+疫苗1.生产企业+反应分类,data=pizhen)))
pizhen <- filter(res,Freq>0)
write.csv(pizhen,"C:\\Users\\liangxf\\Desktop\\pizhen.csv")


for (i in 1:length(yichangnew$drugs)) {
  if(length(unlist(strsplit(yichangnew$events[i], "-")))==2){
    yichangnew$events[i] <- as.character(unlist(strsplit(yichangnew$events[i], "-"))[2])
  }
}

2016年3月20日星期日

ubuntu install sublime

sudo add-apt-repository ppa:webupd8team/sublime-text-3
sudo apt-get update
sudo apt-get install sublime-text-installer
License Key
—– BEGIN LICENSE —–
Peter Eriksson
Single User License
EA7E-890068
8E107C71 3100D6FC 2AC805BF 9E627C77
72E710D7 43392469 D06A2F5B F9304FBD
F5AB4DB2 7A95F172 FE68E300 42745819
E94AB2DF C1893094 ECABADC8 71FEE764
20224821 3EABF931 745AF882 87AD0A4B
33C6E377 0210D712 CD2B1178 82601542
C7FD8098 F45D2824 BC7DFB38 F1EBD38A
D7A3AFE0 96F938EA 2D90BD72 9E34CDF0
—— END LICENSE ——

2016年3月14日星期一

cygwin 安装及代理配置

配置代理
 修改C:\cygwin\etc\skel\  C:\cygwin\home\liangxf 和    C:\cygwin\etc\defaults\etc\skel 下的 .bash_profile 文件
 添加 export http_proxy=http://127.0.0.1:1080
 重启后代理生效

安装apt-cyg
wget http://apt-cyg.googlecode.com/svn/trunk/apt-cyg
或者 https://codeload.github.com/transcode-open/apt-cyg/zip/master
chmod +x apt-cyg
mv apt-cyg /usr/local/bin/

安装包
apt-cyg install util-linux bind-utils openssh curl

查找包

apt-cyg find php

编辑/home/用户名/.bashrc。在末尾添加

PS1="\[\e[1;37m\][\u@\h \[\e[1;34m\]\W\[\e[1;37m\]]\[\e[1;36;5m\]\\$\[\e[m\]"
 export LC_ALL="en_US.UTF-8"

 解决apt-cyg安装软件出现的MD5 sum did not match, exiting错误
修改/usr/local/bin目录下的apt-cyg文件,查找MD5 sum,将 exit 1 行注释掉
1.  # check the md5
2.      digest=`cat "desc" | awk '/^install: / { print $4; exit }'`
3.      digactual=`md5sum $file | awk '{print $1}'`
4.      if ! test $digest = $digactual
5.      then
6.        echo MD5 sum did not match, exiting
7.        #exit 1

8.      fi

2016年3月11日星期五

正态分布的极大似然估计

#生成正态分布的随机数,u=0,sigma=1
y <- as.data.frame(rnorm(1000))

#方法一
normal.lik<-function(theta,y){
  mu<-theta[1]
  sigma2<-theta[2]
  n<-nrow(y)
  logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2)
  return(-logl)
}
optim(c(0,1),normal.lik,y=y)

#方法二
normal.lik2<-function(mu,sigma2){
  n<-nrow(y)
  logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2)
  return(-logl)
}
library(bbmle)
mle2(normal.lik2, start = list(mu=0,sigma2=1))

2016年3月9日星期三

贝叶斯线性回归

library(LearnBayes)
data(birdextinct)
attach(birdextinct)
birdextinct
logtime=log(time)
fit=lm(logtime ~ nesting+size+status,
         data=birdextinct, x=TRUE, y=TRUE)
summary(fit)
theta.sample <- blinreg(fit$y,fit$x,5000)
par(mfrow=c(2,2))
hist(theta.sample$beta[,2], main="NESTING",
       xlab=expression(beta[1]))
hist(theta.sample$beta[,3], main="SIZE",
       xlab=expression(beta[2]))
hist(theta.sample$beta[,4], main="STATUS",
       xlab=expression(beta[3]))
hist(theta.sample$sigma, main="ERROR SD",
       xlab=expression(sigma))

apply(theta.sample$beta,2, quantile, c(.05,.5,.95))
quantile(theta.sample$sigma,c(.05,.5,.95))

N-N分层贝叶斯模型

y <- c(28, 8, -3, 7, -1, 1, 18, 12)
sd <- c(15, 10, 16, 11, 9, 11, 10, 18)
v <- sd * sd
tau <- c(0:3000)/100
tausq <- tau * tau
ptau.y <- rep(0, 3001)
vmu <- rep(0, 3001)
muhat <- rep(0, 3001)
for (i in (1:3001)) {
  vmu[i] <- 1/sum(1/(tausq[i] + v))
  muhat[i] <- vmu[i] * sum(y/(tausq[i] + v))
  ptau.y[i] <- sqrt(vmu[i] * prod(1/(tausq[i] + v))) * prod(exp(-0.5 *
                                                                  (y - muhat[i]) * (y - muhat[i])/(tausq[i] + v)))
}
plot(tau, ptau.y, type = "l", yaxt = "n", xlab = quote(tau))

eth.tauy <- matrix(rep(0, 24008), 8, 3001)
sdth.tauy <- matrix(rep(0, 24008), 8, 3001)
for (j in (1:8)) {
  for (i in (1:3001)) {
    eth.tauy[j, i] <- (y[j]/v[j] + muhat[i]/tausq[i])/(1/v[j] + 1/tausq[i])
    sdth.tauy[j, i] <- sqrt(((1/tausq[i])/(1/v[j] + 1/tausq[i])) *
                              ((1/tausq[i])/(1/v[j] + 1/tausq[i])) * vmu[i] + 1/(1/v[j] +
                                                                                   1/tausq[i]))
  }
}
par(mfrow = c(1, 2))
taux <- matrix(rep(tau, 8), 8, byrow = T)
matplot(t(taux), t(eth.tauy), ylim = (c(-5, 30)), type = "l", xlab = "tau",
        lty = 1:8, lwd = 1, col = 1, ylab = "Estimate Treatment Effects", main = "Conditional posterior mean")
School <- c("A", "B", "C", "D", "E", "F", "G", "H")
text(x = rep(20, 8), y = t(eth.tauy)[2400, ], School)
matplot(t(taux), t(sdth.tauy), ylim = (c(0, 20)), type = "l", xlab = "tau",
        lty = 1:8, lwd = 1, col = 1, ylab = "Posterior Standard Deviations",
        main = "Conditional posterior SD")
text(x = rep(20, 8), y = t(sdth.tauy)[2400, ], School)

m <- 200
ptau.y <- ptau.y/(sum(ptau.y))
tausamp <- sample(tau, m, replace = T, prob = ptau.y)
tausamp <- sort(tausamp)
tauid <- tausamp * 100 + 1
x <- matrix(rnorm(m * 10, 0, 1), m, 10)
x[, 1] <- tausamp
x[, 2] <- muhat[tauid] + sqrt(vmu[tauid]) * x[, 2]
for (j in (1:8)) {
  thmean <- (y[j] * x[, 1] * x[, 1] + v[j] * x[, 2])/(v[j] + x[, 1] *
                                                        x[, 1])
  thsd <- sqrt(v[j] * x[, 1] * x[, 1]/(v[j] + x[, 1] * x[, 1]))
  x[, j + 2] <- thmean + thsd * x[, j + 2]
}
par(mfrow = c(1, 2))
hist(x[, 2], breaks = c(-40:50), xlab = quote(mu), yaxt = "n", main = "")
hist(x[, 3], breaks = c(-20:60), xlab = "Effect in School A", yaxt = "n",
     main = "")

2016年3月7日星期一

单位编码核对

library(dplyr)
gavi <- read.csv("C:\\Users\\liangxf\\Desktop\\单位信息_主表.csv",skip = 1,header = T,stringsAsFactors = F)%>%
  select(name=单位名称,regionid=所在地区Id,code=单位编码)
provice <- read.csv("C:\\Users\\liangxf\\Desktop\\单位编码.csv",skip = 3,header = T,stringsAsFactors = F)%>%
  select(name=全称,code=国标编码)%>%
  filter(nchar(code)>6)
 
#GAVI系统中有,省系统中无
leftanti <- anti_join(gavi, provice,by=c("code"))
#省系统中有,GAVI系统中无
rightanti <- anti_join(provice,gavi,by=c("code"))

极大似然估计(Fitting a Model by Maximum Likelihood)

#拟合正态分布
产生一个正态分布的数据
```{r}
set.seed(1001)
N <- 100
x <- rnorm(N, mean = 3, sd = 2)
mean(x)
sd(x)
```

定义似然函数
```{r}
LL <- function(mu, sigma) {
  R = dnorm(x, mu, sigma)
  -sum(log(R))
}
```

```{r}
library(stats4)
#由于计算时产生了负值,所以出现警告信息
mle(LL, start = list(mu = 1, sigma=1))
#比如,mean=1,sd=-1,标准差不能为负数
dnorm(x, 1, -1)
```


mle()调用optim(),mle()默认方法BFGS
```{r}
library(stats4)
mle(LL, start = list(mu = 1, sigma=1), method = "L-BFGS-B", lower = c(-Inf, 0),
    upper = c(Inf, Inf))
```

修改似然函数
```{r}
LL <- function(mu, sigma) {
  R = suppressWarnings(dnorm(x, mu, sigma))
  -sum(log(R))
}
mle(LL, start = list(mu = 1, sigma=1))
#初始值选择错误,将对结果又较大影响
mle(LL, start = list(mu = 0, sigma=1))
```

#拟合线性模型
产生数据
```{r}
x <- runif(N)
y <- 5 * x + 3 + rnorm(N)
fit <- lm(y ~ x)
summary(fit)
plot(x, y)
abline(fit, col = "red")
```

由于模型不是概率密度函数(pdf),无法像正态分布那样进行精确的似然函数的定义。由于线性模型要求残差符合正态分布,所以定义残差的似然函数
```{r}
LL <- function(beta0, beta1, mu, sigma) {
    # Find residuals
    R = y - x * beta1 - beta0
    # Calculate the likelihood for the residuals (with mu and sigma as parameters)
    R = suppressWarnings(dnorm(R, mu, sigma))
    # Sum the log likelihoods for all of the data points
    -sum(log(R))
}

LL <- function(beta0, beta1, mu, sigma) {
    R = y - x * beta1 - beta0
    #
    R = suppressWarnings(dnorm(R, mu, sigma, log = TRUE))
    #
    -sum(R)
}
```

```{r}
#同样,初始值很重要,错误的选择将有问题
#fit <- mle(LL, start = list(beta0 = 3, beta1 = 1, mu = 0, sigma=1))
#fit <- mle(LL, start = list(beta0 = 4, beta1 = 2, mu = 0, sigma=1))
fit <- mle(LL, start = list(beta0 = 5, beta1 = 3, mu = 0, sigma=1))
summary(fit)
fit <- mle(LL, start = list(beta0 = 2, beta1 = 1.5, sigma=1), fixed = list(mu = 0),
             nobs = length(y))
summary(fit)
```

模型比较
```{r}
AIC(fit)
BIC(fit)
logLik(fit)
```

mle()函数调用失败的问题在于对求Hessian 矩阵的逆矩阵。stats4包中的mle()方法除了有较好的初始值外,没有更好的解决办法。bbmle包中的mle2()提供了更好的替代方法。
```{r}
library(bbmle)
fit <- mle2(LL, start = list(beta0 = 3, beta1 = 1, mu = 0, sigma = 1))
summary(fit)
```

参考:http://www.r-bloggers.com/fitting-a-model-by-maximum-likelihood/

函数优化

R语言中对函数求极值有两种命令,较简单的是optimize函数,在确定搜索范围后可对一元函数求极值,例如:
f = function(x) {3*x^4-2*x^3+3*x^2-4*x+5}
optimize(f, lower=-20, upper=20)
#optimize(f,c(-20,20),maximum=F)
其中$minimum为自变量x的取值,$objective为函数值

#似然函数
f <- function(p) {
  p^7*(1-p)^3
}

#对数似然函数
ll <-function(p) {
  7*log(p)+3*log(1-p)
}

optimize(f, lower=0, upper=1,maximum = T)
optimize(ll, lower=0, upper=1,maximum = T)

optim函数中第一项为起始点,第二项为目标函数,后面的是所需数据变量,optim命令的默认方法是Nelder-Mead算法,它是求多维函数极值的一种算法,由Nelder和Mead提出,又叫单纯形算法,但和线性规划中的单纯形算法是不同的,由于未利用任何求导运算,算法比较简单,但收敛速度较慢,适合变量数不是很多的方程求极值
normal <- function(theta,x){
  mu <- theta[1]
  sigma2 <- theta[2]
  n <- length(x)
  logL <- -0.5*n*log(2*pi)-0.5*n*log(sigma2)-(1/(2*sigma2))*sum((x-mu)**2)
  return (-logL)
}

x <- rnorm(1000)
optim(c(0,1),normal,x=x)
optim(c(0,1),normal,x=x,method="BFGS")

多参数贝叶斯估计

logit<-function(x){
  y=log(x/(1-x))
  return(y)
}

bioassay<-data.frame(
x <- c(-0.86, -0.30, -0.05, 0.73),
n <- c(5, 5, 5, 5),
y <- c(0.01, 1, 3, 4.99),
r <- logit(y/n))
plot(x,r)
lm.bioassay<-lm(formula = r~x)
abline(lm.bioassay)
summary(lm.bioassay)

bioassay.post <- function(alpha = 0.1, beta = 5) {
  k <- 4
  x <- c(-0.86, -0.3, -0.05, 0.73)
  n <- c(5, 5, 5, 5)
  y <- c(0, 1, 3, 5)
  #prod连乘
  prod <- 1
  prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y *
                 (1/(1 + exp(alpha + beta * x)))^(n - y))
  return(prod)
}

mlpost <- function(alpha = 0.1, beta = 5) {
  -log(bioassay.post(alpha, beta))
}
library(bbmle)
mle(mlpost)


modedensity <- bioassay.post(0.87, 7.91)
alphax <- seq(-5, 10, length = 1000)
betay <- seq(-10, 40, length = 1000)
#Vectorize,能将一个不能进行向量化运算的函数进行转化,使之具备向量化运算功能。
post <- outer(alphax, betay, Vectorize(bioassay.post))
par(mfrow = c(1, 2))
contour(alphax, betay, post, levels = seq(0.05, 0.95, length = 10) * modedensity,
        xlim = c(-5, 10), ylim = c(-10, 40), xlab = quote(alpha), ylab = quote(beta),
        drawlabels = FALSE)

post <- post/sum(post)
posta <- apply(post, MARGIN = 1, FUN = sum)
w <- posta/sum(posta)
n <- 1000
ra <- rep(0, n)
rb <- rep(0, n)
for (j in 1:n) {
  ra[j] <- sample(alphax, 1, replace = T, prob = w)
  postb <- bioassay.post(ra[j], betay)
  wb <- postb/sum(postb)
  rb[j] <- sample(betay, 1, replace = T, prob = w)
}
plot(ra, rb, xlim = c(-5, 10), ylim = c(-10, 40), xlab = quote(alpha),
     ylab = quote(beta))

ld50 <- -ra/rb
hist(ld50,freq=FALSE,breaks=1000,xlim=c(-0.8,0.5),
       axes = TRUE, xlab="LD50",main="")

2016年3月6日星期日

贝叶斯估计

bioassay.post <- function(alpha = 0.1, beta = 5) {
  k <- 4
  x <- c(-0.86, -0.3, -0.05, 0.73)
  n <- c(5, 5, 5, 5)
  y <- c(0, 1, 3, 5)
  prod <- 1
  prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y *
                 (1/(1 + exp(alpha + beta * x)))^(n - y))
  return(prod)
}

mlpost <- function(alpha = 0.1, beta = 5) {
  -log(bioassay.post(alpha, beta))
}
library(bbmle)
mle(mlpost)

2016年3月5日星期六

R bayes

library(dplyr)
library(tidyr)
library(ggplot2)

x <- seq(0, 1, 0.01)
n <- 5
y <- 3
alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5)
beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5)
z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha)))
z.prio <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha)))
for (i in c(1:9)) {
  z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i])
  z.prio[, i] <- dbeta(x, alpha[i], beta[i])
}

z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4,
                 post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9)
z.prio <- select(z.prio, prio.V1 = V1, prio.V2 = V2, prio.V3 = V3,
                  prio.V4 = V4, prio.V5 = V5, prio.V6 = V6, prio.V7 = V7, prio.V8 = V8,
                  prio.V9 = V9)

z.wide <- cbind(x,z.post,z.prio)
z <- gather(z.wide,bayes,value,post.V1:prio.V9)%>%mutate(group=substr(bayes,7,8))%>%mutate(co=substr(bayes,1,4))

ggplot(z)+
  geom_line(aes(x,value,colour = factor(co)))+
  facet_wrap(~ group)

2016年3月3日星期四

《R语言与统计分析》 11章代码

#例题 11.3.1

library(dplyr)
library(reshape2)
library(ggplot2)

x <- seq(0, 1, 0.01)
n <- 5
y <- 3
alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5)
beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5)
z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha)))
z.prior <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha)))
for (i in c(1:9)) {
  z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i])
  z.prior[, i] <- dbeta(x, alpha[i], beta[i])
}

z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4,
                 post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9)
z.prior <- select(z.prior, prior.V1 = V1, prior.V2 = V2, prior.V3 = V3,
                  prior.V4 = V4, prior.V5 = V5, prior.V6 = V6, prior.V7 = V7, prior.V8 = V8,
                  prior.V9 = V9)

z.wide <- cbind(x,z.post,z.prior)
z.long1 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V1","prior.V1"),variable.name = "bayes",value.name = "value")%>%mutate(group="V1")
z.long2 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V2","prior.V2"),variable.name = "bayes",value.name = "value")%>%mutate(group="V2")
z.long3 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V3","prior.V3"),variable.name = "bayes",value.name = "value")%>%mutate(group="V3")
z.long4 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V4","prior.V4"),variable.name = "bayes",value.name = "value")%>%mutate(group="V4")
z.long5 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V5","prior.V5"),variable.name = "bayes",value.name = "value")%>%mutate(group="V5")
z.long6 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V6","prior.V6"),variable.name = "bayes",value.name = "value")%>%mutate(group="V6")
z.long7 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V7","prior.V7"),variable.name = "bayes",value.name = "value")%>%mutate(group="V7")
z.long8 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V8","prior.V8"),variable.name = "bayes",value.name = "value")%>%mutate(group="V8")
z.long9 <- melt(z.wide,id.vars=("x"),measure.vars = c("post.V9","prior.V9"),variable.name = "bayes",value.name = "value")%>%mutate(group="V9")

z <- rbind(z.long1,z.long2,z.long3,z.long4,z.long5,z.long6,z.long7,z.long8,z.long9)


ggplot(z)+
  geom_line(aes(x,value,colour = factor(bayes)))+
  facet_wrap(~ group)
  #facet_grid(. ~ group)

ggplot(z.wide, aes(x)) +
  geom_line(aes(y = post.V1, colour = "post"), size = 0.5) +
  geom_line(aes(y = prior.V1, colour = "prior"), size = 0.5)

postmean<-(alpha[1]+y)/(alpha[1]+y+n-y+beta[1])


#例题 11.3.2
x=seq(0,1,0.01)
n=5
y <- c(0,1,2,3,4,5)
alpha <- 1
beta <- 1
z <- as.data.frame(matrix(0,nrow = length(x),ncol=length(y)))
for(i in c(1:6)){
  z[,i] <- dbeta(x,alpha+y[i],n-y[i]+beta)
}

matplot(x, z, ylim=c(0,4), xlab="x", ylab="",
          col=1:6, type="l", lwd=2)
text(0.20,3,"y=0")
text(0.25,2.5,"y=1")
text(0.4,2.2,"y=2")
text(0.55,2.2,"y=3")
text(0.75,2.5,"y=4")
text(0.95,3,"y=5")
text(0.08,1.1,"Prior")