搜索此博客

2016年9月17日星期六

vim 自动补全括号

sudo vim /usr/share/vim/vimrc

### sudo find / -name '*vimrc'

function! AutoPair(open, close)
        let line = getline('.')
        if col('.') > strlen(line) || line[col('.') - 1] == ' '
                return a:open.a:close."\<ESC>i"
        else
                return a:open
        endif
endf

function! ClosePair(char)
        if getline('.')[col('.') - 1] == a:char
                return "\<Right>"
        else
                return a:char
        endif
endf

function! SamePair(char)
        let line = getline('.')
        if col('.') > strlen(line) || line[col('.') - 1] == ' '
                return a:char.a:char."\<ESC>i"
        elseif line[col('.') - 1] == a:char
                return "\<Right>"
        else
                return a:char
        endif
endf

function! RemovePairs()
    let l:line = getline(".")
    let l:previous_char = l:line[col(".")-1]

    if index(["(", "[", "{"], l:previous_char) != -1
        let l:original_pos = getpos(".")
        execute "normal %"
        let l:new_pos = getpos(".")

        if l:original_pos == l:new_pos
            execute "normal! a\<BS>"
            return
        end

        let l:line2 = getline(".")
        if len(l:line2) == col(".")
            execute "normal! v%xa"
        else
            execute "normal! v%xi"
        end

    else
        execute "normal! a\<BS>"
    end
endfunction

function! RemoveNextDoubleChar(char)
    let l:line = getline(".")
    let l:next_char = l:line[col(".")]

    if a:char == l:next_char
        execute "normal! l"
    else
        execute "normal! i" . a:char . ""
    end
endfunction

inoremap ) <ESC>:call RemoveNextDoubleChar(')')<CR>a
inoremap ] <ESC>:call RemoveNextDoubleChar(']')<CR>a
inoremap } <ESC>:call RemoveNextDoubleChar('}')<CR>a
inoremap <BS> <ESC>:call RemovePairs()<CR>a
inoremap ( <c-r>=AutoPair('(', ')')<CR>
inoremap ) <c-r>=ClosePair(')')<CR>
inoremap { <c-r>=AutoPair('{', '}')<CR>
inoremap } <c-r>=ClosePair('}')<CR>
inoremap [ <c-r>=AutoPair('[', ']')<CR>
inoremap ] <c-r>=ClosePair(']')<CR>
inoremap " <c-r>=SamePair('"')<CR>
inoremap ' <c-r>=SamePair("'")<CR>
inoremap ` <c-r>=SamePair('`')<CR>

2016年9月9日星期五

R等比例抽样

library(caret)
#等比例抽样,按照物种分类的变量抽取10%的样本
splitindex <- createDataPartition(iris$Species,times = 1,p = 0.1,list = FALSE)
sample <- iris[splitindex,]

#亚变量处理
cor(iris[,1:4])
dmy <-dummyVars(~Species,data=iris)
result <- data.frame(predict(dmy,newdata = iris))

#Rattle
library(rattle)
rattle()

ubuntu 安装 flash 插件

sudo apt install flashplugin-installer

2016年9月6日星期二

forcats包简介

forcats

安装

# install.packages("devtools")
devtools::install_github("hadley/forcats")
## Skipping install of 'forcats' from a github remote, the SHA1 (004279de) has not changed since last install.
##   Use `force = TRUE` to force installation

使用

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
head(gss_cat)
## # A tibble: 6 × 9
##    year       marital   age   race        rincome            partyid
##   <int>        <fctr> <int> <fctr>         <fctr>             <fctr>
## 1  2000 Never married    26  White  $8000 to 9999       Ind,near rep
## 2  2000      Divorced    48  White  $8000 to 9999 Not str republican
## 3  2000       Widowed    67  White Not applicable        Independent
## 4  2000 Never married    39  White Not applicable       Ind,near rep
## 5  2000      Divorced    25  White Not applicable   Not str democrat
## 6  2000       Married    25  White $20000 - 24999    Strong democrat
## # ... with 3 more variables: relig <fctr>, denom <fctr>, tvhours <int>

改变因子的值(Change level values)

gss_cat %>% count(partyid)
## # A tibble: 10 × 2
##               partyid     n
##                <fctr> <int>
## 1           No answer   154
## 2          Don't know     1
## 3         Other party   393
## 4   Strong republican  2314
## 5  Not str republican  3032
## 6        Ind,near rep  1791
## 7         Independent  4119
## 8        Ind,near dem  2499
## 9    Not str democrat  3690
## 10    Strong democrat  3490
gss_cat %>%
  mutate(partyid = fct_recode(partyid,
    "Republican, strong"    = "Strong republican",
    "Republican, weak"      = "Not str republican",
    "Independent, near rep" = "Ind,near rep",
    "Independent, near dem" = "Ind,near dem",
    "Democrat, weak"        = "Not str democrat",
    "Democrat, strong"      = "Strong democrat"
  )) %>%
  count(partyid)
## # A tibble: 10 × 2
##                  partyid     n
##                   <fctr> <int>
## 1              No answer   154
## 2             Don't know     1
## 3            Other party   393
## 4     Republican, strong  2314
## 5       Republican, weak  3032
## 6  Independent, near rep  1791
## 7            Independent  4119
## 8  Independent, near dem  2499
## 9         Democrat, weak  3690
## 10      Democrat, strong  3490
gss_cat %>%
  mutate(partyid = fct_collapse(partyid,
    other = c("No answer", "Don't know", "Other party"),
    rep = c("Strong republican", "Not str republican"),
    ind = c("Ind,near rep", "Independent", "Ind,near dem"),
    dem = c("Not str democrat", "Strong democrat")
  )) %>%
  count(partyid)
## # A tibble: 4 × 2
##   partyid     n
##    <fctr> <int>
## 1   other   548
## 2     rep  5346
## 3     ind  8409
## 4     dem  7180

合并因子,保留最多的那n个因子

gss_cat %>% count(relig)
## # A tibble: 15 × 2
##                      relig     n
##                     <fctr> <int>
## 1                No answer    93
## 2               Don't know    15
## 3  Inter-nondenominational   109
## 4          Native american    23
## 5                Christian   689
## 6       Orthodox-christian    95
## 7             Moslem/islam   104
## 8            Other eastern    32
## 9                 Hinduism    71
## 10                Buddhism   147
## 11                   Other   224
## 12                    None  3523
## 13                  Jewish   388
## 14                Catholic  5124
## 15              Protestant 10846
#默认为1
gss_cat %>% 
  mutate(relig = fct_lump(relig)) %>% 
  count(relig)
## # A tibble: 2 × 2
##        relig     n
##       <fctr> <int>
## 1 Protestant 10846
## 2      Other 10637
gss_cat %>% 
  mutate(relig = fct_lump(relig, n = 5)) %>% 
  count(relig)
## # A tibble: 6 × 2
##        relig     n
##       <fctr> <int>
## 1  Christian   689
## 2       None  3523
## 3     Jewish   388
## 4   Catholic  5124
## 5 Protestant 10846
## 6      Other   913
#负数为反方向保留,prop为比例
gss_cat %>% 
  mutate(relig = fct_lump(relig, prop = -0.10)) %>% 
  count(relig)
## # A tibble: 12 × 2
##                      relig     n
##                     <fctr> <int>
## 1                No answer    93
## 2               Don't know    15
## 3  Inter-nondenominational   109
## 4          Native american    23
## 5                Christian   689
## 6       Orthodox-christian    95
## 7             Moslem/islam   104
## 8            Other eastern    32
## 9                 Hinduism    71
## 10                Buddhism   147
## 11                  Jewish   388
## 12                   Other 19717

Change order of levels:

fct_relevel(): move specified level up front.

fct_inorder(): order by first appearance of each level.

fct_reorder(): order by summary of another value (same as stats::reorder()).

fct_infreq():

order by frequency.

fct_shuffle(): randomly shuffle order of levels.

fct_rev(): reverse order of levels.

fct_shift(): shift levels to the left/right.

Change value of levels:

fct_anon(): anonymise factor levels.

fct_lump(): lump rarest (or most common) levels into “other”.

fct_recode(): manually recode levels.

Add new levels:

fct_expand(): add new levels to a factor.

fct_explicit_na(): turn missing values into an explicit factor.

A few other helpers:

fct_c(): concatenate factors using union of levels.

fct_count():

count occurences of levels, optionally sorting by frequency.

fct_unify(): ensure list of factors share the same levels.

fct_unique(): compute from levels of factor.

fct_drop(): drop levels without data (same as base::droplevels()).

lvls_union(): finds union of levels from list of factors.

relig <- gss_cat %>%
  group_by(relig) %>%
  summarise(
    age = mean(age, na.rm = TRUE),
    tvhours = mean(tvhours, na.rm = TRUE),
    n = n()
  )
ggplot(relig, aes(tvhours, relig)) + geom_point()

ggplot(relig, aes(tvhours, fct_reorder(relig, tvhours))) +
  geom_point()

by_age <- gss_cat %>%
  filter(!is.na(age)) %>%
  group_by(age, marital) %>%
  count() %>%
  mutate(prop = n / sum(n))

ggplot(by_age, aes(age, prop)) +
  geom_line(aes(colour = marital))

ggplot(by_age, aes(age, prop)) +
  geom_line(aes(colour = fct_reorder2(marital, age, prop))) +
  labs(colour = "marital")