### ----------------------------------------------------------------------- ### Programmieren mit statistischer Software (SoSe 2017) ### ----------------------------------------------------------------------- ## ----better1a------------------------------------------------------------ bli <- read.csv2("better-life-index.csv", stringsAsFactors = FALSE) ## ----better1b------------------------------------------------------------ head(names(bli)) ## ----better1c------------------------------------------------------------ ## summary(bli) ## str(bli) ## head(bli) ## ----char1--------------------------------------------------------------- t <- c("The quick brown fox", "jumps over the lazy dog") # bevorzugt t t2 <- c('The quick brown fox', 'jumps over the lazy dog') t2 ## ----char2--------------------------------------------------------------- t3 <- c('The quick brown fox', 'jumps over the "lazy" dog') t3 t4 <- c("The quick brown fox", "jumps over the 'lazy' dog") t4 ## ----char3--------------------------------------------------------------- # ?Quotes # \n newline # \t tab ## ----char4--------------------------------------------------------------- length(t) ## ----char5--------------------------------------------------------------- nchar(t) ## ----char6--------------------------------------------------------------- #?paste paste("Today is", date()) paste("Today is", date(), sep = ": ") ## ----char7--------------------------------------------------------------- paste("A", 1:6, sep = "") # aequivalent paste0("A", 1:6) ## ----char8--------------------------------------------------------------- paste("A", 1:6, collapse=",", sep="") paste(t, collapse = " ") paste(t, collapse = " ... ") ## ----fun1---------------------------------------------------------------- #?sprintf sprintf("%s is %f feet tall", "Sven", 7.1) ## ----fun2---------------------------------------------------------------- #?tolower tolower(t) #?toupper toupper(t) ## ----fun3---------------------------------------------------------------- #?strtrim strtrim(t, c(5,10)) ## ----fun4---------------------------------------------------------------- #?strwrap strwrap(t, width=5) strwrap(t, width=10) strwrap(t, width=2) ## ----fun5---------------------------------------------------------------- #?abbreviate abbreviate(names.arg=t, minlength = 4) abbreviate(names.arg=t, minlength = 10) abbreviate(names.arg=t, minlength = 2) ## ----match1-------------------------------------------------------------- #?match match(c("a","y"), letters) match(c("a", 1), letters) ## ----match2-------------------------------------------------------------- c("a",1) %in% letters ## ----match3-------------------------------------------------------------- #?pmatch pmatch("med", c("mean", "mode")) pmatch("med", c("mean", "median", "mode")) pmatch("med", c("mean", "median", "mode", "median2")) ## ----match4-------------------------------------------------------------- #?charmatch charmatch("med", c("mean", "mode")) charmatch("med", c("mean", "median", "mode")) charmatch("med", c("mean", "median", "mode", "median2")) ## ----match5-------------------------------------------------------------- #names(bli) match(c("Housing_Rooms.per.person", "Environment_Air.pollution"), names(bli)) ## ----match6-------------------------------------------------------------- match(c("Housing_Rooms.per.person", "Housing_Rooms.per.person_xxx", "Environment_Air.pollution"), names(bli)) ( c("Housing_Rooms.per.person", "Housing_Rooms.per.person_xxx", "Environment_Air.pollution") %in% names(bli) ) ## ----sub1---------------------------------------------------------------- #?substr # substr(x, start, stop) ## ----sub2---------------------------------------------------------------- substr(t, 5, 10) substr(t, 4, 11) substr(t[2], 21, 23) substr(t[2], 21, 23) <- "cat" t ## ----sub3---------------------------------------------------------------- substr(t[2], 21, 23) <- "zo" t substr(t[2], 21, 23) <- "zoxx" t # substring(text, first, last = 1000000L) ## ----sub4---------------------------------------------------------------- substring(t[2], 21) substring(t[2], 21) <- "bee" t substring(t[2], 21) <- "zoxx" t ## ----sub5---------------------------------------------------------------- substr(bli$Income_Households.income, start = 1, stop = nchar(bli$Income_Households.income) - 4) Income_Households.income_num <- as.numeric( substr(bli$Income_Households.income, start = 1, stop = nchar(bli$Income_Households.income) - 4) ) ## ----regex1-------------------------------------------------------------- #?regex ## ----regex2-------------------------------------------------------------- t2 <- c("Programmieren", "mit", "statistischer", "Software", "SS2017") # ?grep # grep(pattern, x, ignore.case = FALSE, perl = FALSE, value = FALSE, # fixed = FALSE, useBytes = FALSE, invert = FALSE) ## ----regex3-------------------------------------------------------------- grep("a", t2) grep("[[:alpha:]]", t2) grep("[[:digit:]]", t2) ## ----regex4-------------------------------------------------------------- grepl("a", t2) grepl("[[:alpha:]]", t2) grepl("[[:digit:]]", t2) ## ----regex5-------------------------------------------------------------- t3 <- c("2012-07-10", "2012-01-20", "May 5, 2012") grep("\\d{4}-\\d{2}-\\d{2}", t3) grepl("\\d{4}-\\d{2}-\\d{2}", t3) ## ----regex6-------------------------------------------------------------- #?regexpr # regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, # fixed = FALSE, useBytes = FALSE) ## ----regex7-------------------------------------------------------------- regexpr("a", t2) ## ----regex8-------------------------------------------------------------- regexpr("[[:alpha:]]", t2) ## ----regex9-------------------------------------------------------------- regexpr("[[:digit:]]", t2) ## ----regex10------------------------------------------------------------- # gregexpr("a", t2) # gregexpr("[[:alpha:]]", t2) # gregexpr("[[:digit:]]", t2) ## ----sub6---------------------------------------------------------------- # sub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, # fixed = FALSE, useBytes = FALSE) sub("(\\d{4})-\\d{2}-\\d{2}", "\\1", t3) sub("(\\d{4})-(\\d{2})-(\\d{2})", "\\3.\\2.\\1", t3) gsub("(\\d{4})-\\d{2}-\\d{2}", "\\1", t3) gsub("(\\d{4})-(\\d{2})-(\\d{2})", "\\3.\\2.\\1", t3) ## ----split1-------------------------------------------------------------- #?strsplit # strsplit(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) ## ----split2-------------------------------------------------------------- x <- c(as = "asfef", qu = "qwerty", "yuiop[", "b", "stuff.blah.yech") strsplit(x, "e") ## ----split3-------------------------------------------------------------- t4 <- c("89. Derdiyok fuer Schuerrle", "69. Kohr fuer L. Bender") ## ----split4-------------------------------------------------------------- #?regexec m <- regexec("(\\d\\d)\\. (.+) fuer (.+)", t4) #?regmatches # Extract or Replace Matched Substrings regmatches(t4, m) ## ----daten1-------------------------------------------------------------- grep("Income_", names(bli)) ## ----daten2-------------------------------------------------------------- sub("(\\d+) .+", "\\1", bli$Income_Households.income) ## ----daten3-------------------------------------------------------------- satisfaction <- data.frame( letter = substr(bli$COUNTRY, 1, 1), value = as.numeric( sub("(.+) rate", "\\1", bli$Life.Satisfaction_Life.Satisfaction))) # satisfaction # barplot(sapply(split(satisfaction$value, satisfaction$letter), mean)) ## ----daten4-------------------------------------------------------------- # Objekt mit Nummer der Iteration im Namen for(i in 1:3){ name <- paste0("res",i) assign(name,i) } res1 res2 res3