### ----------------------------------------------------------------------- ### Programmieren mit statistischer Software (SoSe 2017) ### ----------------------------------------------------------------------- ## ----hsaur1-------------------------------------------------------------- # install.packages("HSAUR2") library(HSAUR2) ## ----hsaur2-------------------------------------------------------------- data(package = "HSAUR2") ## ----hsaur3-------------------------------------------------------------- data("Forbes2000", package = "HSAUR2") ls() ## ----hsaur44------------------------------------------------------------- ?Forbes2000 help("Forbes2000", package = "HSAUR2") ## ----hsaur5-------------------------------------------------------------- str(Forbes2000) ## ----hsaur6-------------------------------------------------------------- str(Forbes2000, vec.len = 1) # verkuerzter Ausschnitt ## ----hsaur7-------------------------------------------------------------- print(Forbes2000) # alle Zeilen ## ----hsaur8a------------------------------------------------------------- head(Forbes2000) # die ersten 6 Zeilen ## ----hsaur8b------------------------------------------------------------- tail(Forbes2000) # die letzten 6 Zeilen ## ----hsaur9-------------------------------------------------------------- head(Forbes2000, n=2) #Ausgabe der ersten beiden Zeilen tail(Forbes2000, n=10) #Ausgabe der ersten 10 Zeilen ## ----hsaur10------------------------------------------------------------- nrow(Forbes2000) # Anzahl an Zeilen ncol(Forbes2000) # Anzahl an Spalten dim(Forbes2000) # Anzahl an Zeilen und Spalten # dim(Forbes2000)[1] # dim(Forbes2000)[2] ## ----hsaur11------------------------------------------------------------- names(Forbes2000) ## ----sum1---------------------------------------------------------------- summary(Forbes2000) ## ----sum2---------------------------------------------------------------- mean(Forbes2000$sales) median(Forbes2000$sales) range(Forbes2000$sales) quantile(Forbes2000$sales) quantile(Forbes2000$sales,probs=seq(0,1,by=0.05)) ## ----sum3---------------------------------------------------------------- table(Forbes2000$category) prop.table(table(Forbes2000$category)) nlevels(Forbes2000$category) levels(Forbes2000$category) ## ----sub1---------------------------------------------------------------- Forbes2000[1:3, ] Forbes2000[-(4:2000),] ## ----sub2---------------------------------------------------------------- Forbes2000[, 2] # nach Index Forbes2000$name # nach Name Forbes2000[, "name"] # nach Name Forbes2000[,c("name", "country")] Forbes2000[,-c(3:ncol(Forbes2000))] # nach Index # Forbes2000[, -"name"] # funktioniert nicht Forbes2000[,-which(names(Forbes2000) == "name")] Forbes2000[,-which(names(Forbes2000) %in% c("name", "country"))] ## ----sub3---------------------------------------------------------------- vars <- c("name", "sales", "profits", "assets") Forbes2000[1:3, vars] ## ----sub4a--------------------------------------------------------------- Forbes2000$assets > 1000 ## ----sub4b--------------------------------------------------------------- table(Forbes2000$assets > 1000) ## ----sub5---------------------------------------------------------------- Forbes2000[Forbes2000$assets > 1000, "name"] # mit which Zugriff auf den Index Forbes2000[which(Forbes2000$assets > 1000), "name"] Forbes2000[Forbes2000$country == "Australia" & Forbes2000$rank < 100, ] # und-Operator ## ----sub5b--------------------------------------------------------------- Forbes2000[Forbes2000$country == "Australia" | Forbes2000$rank < 100, ] # oder-Operator ## ----sub6---------------------------------------------------------------- ?subset subset(x=Forbes2000, subset=Forbes2000$assets > 1000, select=vars) ## ----miss1--------------------------------------------------------------- summary(Forbes2000$profits) ## ----miss2--------------------------------------------------------------- mean(Forbes2000$profits) ?mean mean(Forbes2000$profits, na.rm = TRUE) cor(Forbes2000$profits, Forbes2000$sales) ?cor cor(Forbes2000$profits, Forbes2000$sales, use="pairwise.complete.obs") ## ----miss3a-------------------------------------------------------------- which(is.na(Forbes2000$profits)) # which(is.na(Forbes2000)) # nicht so hilfreich which(is.na(Forbes2000), arr.ind=TRUE) ## ----miss3b-------------------------------------------------------------- # Den Index der gueltigen Werte einer Variable erhaelt man mit which(!is.na(Forbes2000$profits)) ## ----miss4--------------------------------------------------------------- complete.cases(Forbes2000$profits) # vollstaendige Eintr\"age complete.cases(Forbes2000) # vollstaendige Beobachtungen (Zeilen) ## ----miss5--------------------------------------------------------------- ?na.omit Forbes2000cc <- na.omit(Forbes2000) dim(Forbes2000cc) mean(Forbes2000cc$profits) ## ----manip1-------------------------------------------------------------- Forbes2000cc$costs <- Forbes2000cc$sales - Forbes2000cc$profits ## ----manip2-------------------------------------------------------------- Forbes2000cc$category <- NULL dim(Forbes2000cc) ## ----manip3-------------------------------------------------------------- Forbes2000cc[, 4:7] <- scale(Forbes2000cc[, 4:7]) ## ----manip4-------------------------------------------------------------- median(Forbes2000cc[, 4:7]) c(median(Forbes2000cc[, "sales"]), median(Forbes2000cc[, "profits"])) ## ----manip5-------------------------------------------------------------- ?apply apply(X=Forbes2000cc[, 4:7], MARGIN=2, FUN=median) ?sapply sapply(X=Forbes2000cc[, 4:7], FUN=median) ## ----manip6-------------------------------------------------------------- Forbes2000cc[722, ] ## ----manip7-------------------------------------------------------------- Forbes2000cc[722, "sales"] <- 0.05 Forbes2000cc[722, ] ## ----save1--------------------------------------------------------------- ?saveRDS saveRDS(Forbes2000cc, file = "Forbes2000cc.rds") rm(Forbes2000cc) ## ----save2--------------------------------------------------------------- Forbes2000cc <- readRDS("Forbes2000cc.rds") ## ----save3--------------------------------------------------------------- ?write.table # zum Speichern von Datensaetzen als Text-Datei ?save # zum Speichern eines oder mehrerer Objekte ?save.image # zum Speichern des gesamten Arbeitsverzeichnises # setwd("...") save.image(file="workspace.RData")