搜索此博客

2016年5月30日星期一

升级所有pip包

conda update --all
sudo pip freeze --local | grep -v '^\-e' | cut -d = -f 1  | xargs pip install -U

R 与矩阵

library(Matrix)
A <- matrix(1:18,3,6)
#求秩
rankMatrix(A)
#奇异值分解
svd(A)
#特征值分解(方阵)
B <- matrix(1:9,3)
eigen(B)

library(numDeriv)
sc2.f <- function(x){
  n <- length(x)
  sum((1:n) * (exp(x) - x)) / n
}

sc2.g <- function(x){
  n <- length(x)
  (1:n) * (exp(x) - 1) / n
}

x0 <- rnorm(5)
hess <- hessian(func=sc2.f, x=x0)
hessc <- hessian(func=sc2.f, x=x0, "complex")
all.equal(hess, hessc, tolerance = .Machine$double.eps)

#  Hessian = Jacobian of the gradient
jac  <- jacobian(func=sc2.g, x=x0)
jacc <- jacobian(func=sc2.g, x=x0, "complex")
all.equal(hess, jac, tolerance = .Machine$double.eps)
all.equal(hessc, jacc, tolerance = .Machine$double.eps)

data.table的使用(与dplyr包比较)

library(dplyr)
library(data.table)
library(lubridate)
library(jsonlite)
library(tidyr)
library(ggplot2)
library(compare)

spending=fromJSON("https://data.medicare.gov/api/views/nrth-mfg3/rows.json?accessType=DOWNLOAD")
names(spending)
meta=spending$meta
hospital_spending=data.frame(spending$data)
colnames(hospital_spending)=make.names(meta$view$columns$name)
hospital_spending=select(hospital_spending,-c(sid:meta))
#dplyr包,Get a glimpse of your data
glimpse(hospital_spending)

cols = 6:11; # 需要改变数据类型的列
hospital_spending[,cols] <- lapply(hospital_spending[,cols],as.character)
hospital_spending[,cols] <- lapply(hospital_spending[,cols],as.numeric)

cols = 12:13; # 需要纠正的两列
hospital_spending[,cols] <- lapply(hospital_spending[,cols],as.POSIXct)

#检查下数据列是否是我们想要的数据类型
sapply(hospital_spending, class)

#使用data.table函数创建data.table类型数据
hospital_spending_DT = data.table(hospital_spending)
class(hospital_spending_DT)

#选取一个变量
from_dplyr = select(hospital_spending, Hospital.Name)
from_data_table = hospital_spending_DT[,.(Hospital.Name)]

#删除一个变量
from_dplyr = select(hospital_spending, -Hospital.Name)
from_data_table = hospital_spending_DT[,!c("Hospital.Name"),with=FALSE]

DT=copy(hospital_spending_DT)
DT=DT[,Hospital.Name:=NULL]
"Hospital.Name"%in%names(DT)

#选取多个变量
from_dplyr = select(hospital_spending, Hospital.Name,State,Measure.Start.Date,Measure.End.Date)
from_data_table = hospital_spending_DT[,.(Hospital.Name,State,Measure.Start.Date,Measure.End.Date)]

#删除多个变量
from_dplyr = select(hospital_spending, -c(Hospital.Name,State,Measure.Start.Date,Measure.End.Date))
from_data_table = hospital_spending_DT[,!c("Hospital.Name","State","Measure.Start.Date","Measure.End.Date"),with=FALSE]

DT=copy(hospital_spending_DT)
DT=DT[,c("Hospital.Name","State","Measure.Start.Date","Measure.End.Date"):=NULL]
c("Hospital.Name","State","Measure.Start.Date","Measure.End.Date") %in% names(DT)

#将选取所有列名包含字符“Date”的列
from_dplyr = select(hospital_spending,contains("Date"))
from_data_table = subset(hospital_spending_DT,select=grep("Date",names(hospital_spending_DT)))

#重命名列名
setnames(hospital_spending_DT,c("Hospital.Name", "Measure.Start.Date","Measure.End.Date"), c("Hospital","Start_Date","End_Date"))
hospital_spending = rename(hospital_spending,Hospital= Hospital.Name, Start_Date=Measure.Start.Date,End_Date=Measure.End.Date)

#对单个变量进行筛选
from_dplyr = filter(hospital_spending,State=='CA') # selecting rows for California
from_data_table = hospital_spending_DT[State=='CA']

#对多个变量进行筛选
from_dplyr = filter(hospital_spending,State %in% c('CA','MA',"TX"))
from_data_table = hospital_spending_DT[State %in% c('CA','MA',"TX")]

#升序
from_dplyr = arrange(hospital_spending, State)
from_data_table = setorder(hospital_spending_DT, State)

#降序
from_dplyr = arrange(hospital_spending, desc(State))
from_data_table = setorder(hospital_spending_DT, -State)

#对多变量进行排序
#State变量升序,End_Date变量降序排序:
from_dplyr = arrange(hospital_spending, State,desc(End_Date))
from_data_table = setorder(hospital_spending_DT, State,-End_Date)

#添加或更新列
from_dplyr = mutate(hospital_spending, diff=Avg.Spending.Per.Episode..State. - Avg.Spending.Per.Episode..Nation.)
from_data_table = copy(hospital_spending_DT)
from_data_table = from_data_table[,diff := Avg.Spending.Per.Episode..State. - Avg.Spending.Per.Episode..Nation.]

#数据汇总
summarize(hospital_spending,mean=mean(Avg.Spending.Per.Episode..Nation.))
hospital_spending_DT[,.(mean=mean(Avg.Spending.Per.Episode..Nation.))]
summarize(hospital_spending,mean=mean(Avg.Spending.Per.Episode..Nation.),
          maximum=max(Avg.Spending.Per.Episode..Nation.),
          minimum=min(Avg.Spending.Per.Episode..Nation.),
          median=median(Avg.Spending.Per.Episode..Nation.))
hospital_spending_DT[,.(mean=mean(Avg.Spending.Per.Episode..Nation.),
                        maximum=max(Avg.Spending.Per.Episode..Nation.),
                        minimum=min(Avg.Spending.Per.Episode..Nation.),
                        median=median(Avg.Spending.Per.Episode..Nation.))]


head(hospital_spending_DT[,.(mean=mean(Avg.Spending.Per.Episode..Hospital.)),by=.(Hospital)])

mygroup= group_by(hospital_spending,Hospital)
from_dplyr = summarize(mygroup,mean=mean(Avg.Spending.Per.Episode..Hospital.))
from_data_table=hospital_spending_DT[,.(mean=mean(Avg.Spending.Per.Episode..Hospital.)), by=.(Hospital)]

mygroup= group_by(hospital_spending,Hospital,State)
from_dplyr = summarize(mygroup,mean=mean(Avg.Spending.Per.Episode..Hospital.))

#链式操作
#在data.table中,我们可以使用%>%或[来实现链式操作
from_dplyr=hospital_spending %>% group_by(Hospital,State) %>% summarize(mean=mean(Avg.Spending.Per.Episode..Hospital.))
from_data_table=hospital_spending_DT[,.(mean=mean(Avg.Spending.Per.Episode..Hospital.)), by=.(Hospital,State)]


2016年5月24日星期二

anaconda 安装

1、下载anaconda(https://www.continuum.io/downloads)
2、 bash Anaconda2-4.0.0-Linux-x86_64.sh
3、 pip install 包名,或者conda install 包名,查看 pip list 或者conda list

2016年5月23日星期一

GPU Deep Learn

一、安装驱动 sudo apt-get install nvidia-352 nvidia-settings
GPU支持 (https://developer.nvidia.com/cuda-gpus) 二、安装CUDA和Digits CUDA_REPO_PKG=cuda-repo-ubuntu1404_7.5-18_amd64.deb && wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG && sudo dpkg -i $CUDA_REPO_PKG
ML_REPO_PKG=nvidia-machine-learning-repo_4.0-2_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/$ML_REPO_PKG && sudo dpkg -i $ML_REPO_PKG
apt-get update apt-get install digits
三、打开您的浏览器并访问http://localhost

2016年5月22日星期日

ubuntu install maltego

sudo add-apt-repository ppa:darklordpaunik8880/darkminttrustytahr

sudo apt-get update

sudo apt-get install maltego

编译OpenWrt shadowsocks

1、sudo apt-get install gcc g++ binutils patch bzip2 flex bison make autoconf gettext texinfo unzip sharutils subversion libncurses5-dev ncurses-term zlib1g-dev gawk git libssl-dev ccache

2、sudo vim /etc/profile
#将sdk下staging_dir/host/bin和toolchain/bin目录添加
 export PATH=${PATH}:/home/xuefliang/Downloads/OpenWrt-SDK-15.05.1-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/toolchain-mips_34kc_gcc-4.8-linaro_uClibc-0.9.33.2/bin:/home/xuefliang/Downloads/OpenWrt-SDK-15.05.1-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/host/bin


source /etc/profile

make menuconfig

在Network里面选中shadowsocks-libev,选择编译成模块就可以了,也就是前面一个M,空格键选择。 然后直接推出保存设置。 开始编译。

3、make package/openwrt-shadowsocks/compile V=s

2016年5月13日星期五

Spark安装

1 jps
2 sudo apt-get install scala
3 下载spark
4 tar xvf 解压

SparkR 安装

install.packages("rJava")
install.packages("devtools")

library(devtools)
install_github("amplab-extras/SparkR-pkg",subdir = "pkg")

jupyter安装

安装
1、sudo pip install pyzmq
2、sudo pip install tornado
3、sudo pip install jinja2
4、sudo pip install jsonschema
5、sudo pip install jupyte

or
sudo pip install "ipython[notebook]"


运行:jupyter  notebook
出现jupyter,在其右侧,选择“new”,选择“Python2”

2016年5月6日星期五

ubuntu 配置 dante-server

1、sudo apt-get install dante-server
2、sudo vi /etc/danted.conf


# $Id: sockd.conf,v 1.43 2005/12/26 16:35:26 michaels Exp $
#
# A sample danted.conf
#
#
# The configfile is divided into three parts;
#    1) serversettings
#    2) rules
#    3) routes
#
# The recommended order is:
#   Serversettings:
#               logoutput
#               internal
#               external
#               method
#               clientmethod
#               users
#               compatibility
#               extension
#               connecttimeout
#               iotimeout
# srchost
#
#  Rules:
# client block/pass
# from to
# libwrap
# log
#
#     block/pass
# from to
# method
# command
# libwrap
# log
# protocol
# proxyprotocol
#
#  Routes:

# the server will log both via syslog, to stdout and to /var/log/lotsoflogs
#logoutput: syslog stdout /var/log/lotsoflogs
logoutput: stderr

# The server will bind to the address 10.1.1.1, port 1080 and will only
# accept connections going to that address.
#internal: 10.1.1.1 port = 1080
# Alternatively, the interface name can be used instead of the address.
internal: eth0 port = 10080  #端口号

# all outgoing connections from the server will use the IP address
# 195.168.1.1
external: eth0  # 网卡接口

# list over acceptable methods, order of preference.
# A method not set here will never be selected.
#
# If the method field is not set in a rule, the global
# method is filled in for that rule.
#

# methods for socks-rules.
method: username none # 不使用身份认证

# methods for client-rules.
clientmethod: none  # 不使用身份认证

#or if you want to allow rfc931 (ident) too
#method: username rfc931 none

#or for PAM authentification
#method: pam

#
# An important section, pay attention.
#

# when doing something that can require privilege, it will use the
# userid:
user.privileged: proxy

# when running as usual, it will use the unprivileged userid of:
user.notprivileged: nobody

# If you compiled with libwrap support, what userid should it use
# when executing your libwrap commands?  "libwrap".
user.libwrap: nobody


#
# some options to help clients with compatibility:
#

# when a client connection comes in the socksserver will try to use
# the same port as the client is using, when the socksserver
# goes out on the clients behalf (external: IP address).
# If this option is set, Dante will try to do it for reserved ports aswell.
# This will usually require user.privileged to be set to "root".
#compatibility: sameport

# If you are using the bind extension and have trouble running servers
# via the server, you might try setting this.  The consequences of it
# are unknown.
#compatibility: reuseaddr

#
# The Dante server supports some extensions to the socks protocol.
# These require that the socks client implements the same extension and
# can be enabled using the "extension" keyword.
#
# enable the bind extension.
#extension: bind


#
#
# misc options.
#

# how many seconds can pass from when a client connects til it has
# sent us it's request?  Adjust according to your network performance
# and methods supported.
#connecttimeout: 30   # on a lan, this should be enough if method is "none".

# how many seconds can the client and it's peer idle without sending
# any data before we dump it?  Unless you disable tcp keep-alive for
# some reason, it's probably best to set this to 0, which is
# "forever".
#iotimeout: 0 # or perhaps 86400, for a day.

# do you want to accept connections from addresses without
# dns info?  what about addresses having a mismatch in dnsinfo?
#srchost: nounknown nomismatch

#
# The actual rules.  There are two kinds and they work at different levels.
#
# The rules prefixed with "client" are checked first and say who is allowed
# and who is not allowed to speak/connect to the server.  I.e the
# ip range containing possibly valid clients.
# It is especially important that these only use IP addresses, not hostnames,
# for security reasons.
#
# The rules that do not have a "client" prefix are checked later, when the
# client has sent its request and are used to evaluate the actual
# request.
#
# The "to:" in the "client" context gives the address the connection
# is accepted on, i.e the address the socksserver is listening on, or
# just "0.0.0.0/0" for any address the server is listening on.
#
# The "to:" in the non-"client" context gives the destination of the clients
# socksrequest.
#
# "from:" is the source address in both contexts.
#


# the "client" rules.  All our clients come from the net 10.0.0.0/8.
#

# Allow our clients, also provides an example of the port range command.
client pass {
from: 0.0.0.0/0 port 1-65535 to: 0.0.0.0/0  #允许的地址
# match all idented users that also are in passwordfile
}

# This is identical to above, but allows clients without a rfc931 (ident)
# too.  In practise this means the socksserver will try to get a rfc931
# reply first (the above rule), if that fails, it tries this rule.
#client pass {
# from: 0.0.0.0/8 port 1-65535 to: 0.0.0.0/0
#}


# drop everyone else as soon as we can and log the connect, they are not
# on our net and have no business connecting to us.  This is the default
# but if you give the rule yourself, you can specify details.
#client block {
# from: 0.0.0.0/0 to: 0.0.0.0/0
# log: connect error
#}


# the rules controlling what clients are allowed what requests
#

# you probably don't want people connecting to loopback addresses,
# who knows what could happen then.
#block {
# from: 0.0.0.0/0 to: 127.0.0.0/8
# log: connect error
#}

# the people at the 172.16.0.0/12 are bad, no one should talk to them.
# log the connect request and also provide an example on how to
# interact with libwrap.
#block {
# from: 0.0.0.0/0 to: 172.16.0.0/12
# libwrap: spawn finger @%a
# log: connect error
#}

# unless you need it, you could block any bind requests.
#block {
# from: 0.0.0.0/0 to: 0.0.0.0/0
# command: bind
# log: connect error
#}

# or you might want to allow it, for instance "active" ftp uses it.
# Note that a "bindreply" command must also be allowed, it
# should usually by from "0.0.0.0/0", i.e if a client of yours
# has permission to bind, it will also have permission to accept
# the reply from anywhere.
pass {
from: 0.0.0.0/0 to: 0.0.0.0/0
protocol:tcp  udp #tcp协议
}

# some connections expect some sort of "reply", this might be
# the reply to a bind request or it may be the reply to a
# udppacket, since udp is packetbased.
# Note that nothing is done to verify that it's a "genuine" reply,
# that is in general not possible anyway.  The below will allow
# all "replies" in to your clients at the 10.0.0.0/8 net.
#pass {
# from: 0.0.0.0/0 to: 10.0.0.0/8
# command: bindreply udpreply
# log: connect error
#}


# pass any http connects to the example.com domain if they
# authenticate with username.
# This matches "example.com" itself and everything ending in ".example.com".
#pass {
# from: 10.0.0.0/8 to: .example.com port = http
# log: connect error
# method: username
#}




# block any other http connects to the example.com domain.
#block {
# from: 0.0.0.0/0 to: .example.com port = http
# log: connect error
#}

# everyone from our internal network, 10.0.0.0/8 is allowed to use
# tcp and udp for everything else.
#pass {
# from: 10.0.0.0/8 to: 0.0.0.0/0
# protocol: tcp udp
#}

# last line, block everyone else.  This is the default but if you provide
# one  yourself you can specify your own logging/actions
#block {
# from: 0.0.0.0/0 to: 0.0.0.0/0
# log: connect error
#}

# route all http connects via an upstream socks server, aka "server-chaining".
#route {
# from: 10.0.0.0/8 to: 0.0.0.0/0 port = http via: socks.example.net port = socks
#}

3、启动
sudo /etc/init.d/danted start
4、查看监听是否成功
  netstat -anp | grep 10080

Ubuntu IP地址配置

sudo vi /etc/network/interfaces

auto eth0
iface eth0 inet dhcp

auto eth0
iface eth0 inet static
address 10.112.6.208
gateway 10.112.6.254
netmask 255.255.255.0

R 二类疫苗统计

library(dplyr)
df <- read.csv("C:\\Users\\xuefliang\\Desktop\\统计\\乙肝汉逊.csv")
df$year<- as.character(df$year)
df$diqu<- as.character(df$diqu)
df$pihao <- as.character(df$pihao)
df$goujin<- as.numeric(as.character(df$goujin))
df$shiyong<- as.numeric(as.character(df$shiyong))
#sum(df$goujin,na.rm=T)
#sum(df$shiyong,na.rm=T)

df$year[is.na(df$year)] <- ""

for(i in 1:nrow(df)){
  if (nchar(df$year)==0){
    df$year[i] <- df$year[i-1]
  }
}

for(i in 1:nrow(df)){
  if (df$year[i]==""){
    df$year[i] <- df$year[i-1]
  }
}

df$diqu[is.na(df$diqu)] <- ""

for(i in 1:nrow(df)){
  if (nchar(df$diqu)==0){
    df$diqu[i] <- df$diqu[i-1]
  }
}

for(i in 1:nrow(df)){
  if (df$diqu[i]==""){
    df$diqu[i] <- df$diqu[i-1]
  }
}

#按-拆分字符串
for(i in 1:nrow(df)){
  df$pihao[i] <- unlist(strsplit(df$pihao[i], "-"))[1]
}

result <- group_by(df,diqu,year,pihao)%>%
  summarise(tgoujin=sum(goujin,na.rm=T),tshiyong=sum(shiyong,na.rm=T))%>%
  mutate(number=1)%>%
  group_by(diqu,year)%>%
  summarise(pihaosum=sum(number),tgoujin=sum(tgoujin,na.rm=T),tshiyong=sum(tshiyong,na.rm=T))


write.csv(result,"C:\\Users\\xuefliang\\Desktop\\统计\\结果\\乙肝汉逊.csv")

2016年5月3日星期二

R 删除空行

library(xlsx)
library(dplyr)
workbook <- "C:\\Users\\Administrator\\Desktop\\合格\\兰州市汇总2010-2016年15种生物制品使用情况信息表().xlsx"
#表1
mydataframe <- read.xlsx2(workbook,1,startRow=3)
mydataframe$X..6 <- as.character(mydataframe$X..6)
mydataframe$X. <- as.character(mydataframe$X.)
mydataframe$X..5 <- as.character(mydataframe$X..5)
mydataframe$购进数量..支. <- as.numeric(as.character(mydataframe$购进数量..支.))
mydataframe$该批次生物制品使用数量..支. <- as.numeric(as.character(mydataframe$该批次生物制品使用数量..支.))
#删除空白行
num=c()
for(i in 1:nrow(mydataframe)){
  if (nchar(mydataframe$X..5[i])==0){
    num[i] <- i
  }
}
mydataframe <- mydataframe[-num[!is.na(num)],]

for(i in 1:nrow(mydataframe)){
  if (nchar(mydataframe$X.[i])==0){
    mydataframe$X.[i] <- mydataframe$X.[i-1]
  }
}

#按-拆分字符串
for(i in 1:nrow(mydataframe)){
  mydataframe$X..6[i] <- unlist(strsplit(mydataframe$X..6[i], "-"))[1]
}

result <- select(mydataframe,year=X.,pihao=X..6,goujin=购进数量..支.,shiyong=该批次生物制品使用数量..支. )%>%
  group_by(year,pihao)%>%
  summarise(tgoujin=sum(goujin),tshiyong=sum(shiyong))

write.csv(result,"C:\\Users\\Administrator\\Desktop\\合格\\兰州\\冻干人用狂犬病疫苗(Vero细胞).csv")

2016年5月2日星期一