Vector oluşturma
a<-c(1,2,3,4,5,6,7)
b<-c(4,3,4,4,6,7,8)
c<-c(“a”,”b”,”c”,”ç”,”d”,”e”,”f”)
d<-c(“male”,”female”,”female”,”female”,”female”,”male”,”female”)
e<-c(“white”,”white”,”red”,”red”,”blue”,”blue”,”blue”)
vector’lerden dataframe oluşturma
df<-data.frame(a,b,c,d,e)
dataframe özellikleri
str(df)
ggplot grafiği
ggplot(df, aes(x=a, y=b)) + geom_point()
The most basic barplot you can do:
barplot(height=df$b, names=df$c, col=”#69b3a2″)
The most basic barplot you can do:
barplot(height=df$b, names=df$c, col=”#69b3a2″, horiz=T , las=1)
ggplot(df, aes(x=b, y=c)) + geom_bar(stat = “identity”) + coord_flip()
https://sparkbyexamples.com/r-programming/
filter
library(dplyr)
filter(df, c==’ç’)
df_filtered <- filter(df, c %in% c(“a”, “d”, “f”))
df_filtered
filter() by multiple conditions
filter(df, b == 4 & c==’c’)
filter() by row number
slice(df, 2)
group by çalışması
grp_tbl_1 <- df %>% group_by(d)
agg_tbl_2 <- grp_tbl %>% summarise(sum(a))
agg_tbl_2
Group by on multiple columns
& multiple aggregations
agg_tbl <- df %>% group_by(d, e) %>%
summarise(total_b=sum(b),
total_a = sum(a),
min_b = min(b),
max_b = max(b),
.groups = ‘drop’
)
agg_tbl
distinct() usage on all columns
df2 <- df %>% distinct()
df2
Distinct on select columns
df3 <- df %>% distinct(d,e)
df3
Distinct of single column
df4 <- df %>% distinct(d, .keep_all = TRUE)
df4
lag dplyr
lag(b)
lead(b)
gruba göre bir önceki farkı alıyor.
mutate(df, diff = b – lag(b), .by = e)
library(ggplot2)
library(dplyr)
library(broom)
library(ggpubr)
histogram çizimi
plot(a,b)
correlation
cor(a,b)
regresyon ve özeti
lm(a~b, df)
summary(lm(a~b, df))
summary(lm(a~b, df))
lm(a~b+lag(b), df)
df_regression<-lm(a~b, df)
df_regression
par(mfrow=c(2,2))
plot(df_regression)
par(mfrow=c(1,1))
grafik
https://www.scribbr.com/statistics/linear-regression-in-r/
graph_df<-ggplot(df, aes(a,b))+geom_point()
graph_df
regresyon eklendi
graph_df<-graph_df+geom_smooth(method = “lm”, col=”blue”)
graph_df
regresyon denklemi eklendi.
graph_df<-graph_df+stat_regline_equation(label.x = 3, label.y = 8)
graph_df
başlık ve açıklama eklemeleri
graph_df+theme_bw() +
labs(title = “This is the example of Title”,
x = “Value of b (x$10,000)”,
y = “Value of a (0 to 10)”)
correlation grafikleri
https://r-graph-gallery.com/199-correlation-matrix-with-ggally.html
library(GGally)
Create data
data <- data.frame( var1 = 1:100 + rnorm(100,sd=20), v2 = 1:100 + rnorm(100,sd=27), v3 = rep(1, 100) + rnorm(100, sd = 1))
data$v4 = data$var1 ** 2
data$v5 = -(data$var1 ** 2)
Check correlations (as scatterplots), distribution and print corrleation coefficient
ggpairs(data, title=”correlogram with ggpairs()”)
Nice visualization of correlations
ggcorr(data, method = c(“everything”, “pearson”))
gruplayarak correlation
data(flea)
ggpairs(flea, columns = 2:4, ggplot2::aes(colour=species))
data(tips, package = “reshape”)
ggpairs(
tips[, c(1, 3, 4, 2)],
upper = list(continuous = “density”, combo = “box_no_facet”),
lower = list(continuous = “points”, combo = “dot_no_facet”)
)
