R/truncated_severity.R
fit_truncated_dist.Rd
Estimate the original distribution from truncated data. Truncated data arise frequently in insurance studies. It is common that only claims above a certain threshold are known.
fit_truncated_dist(
y,
dist = c("gamma", "lognormal"),
left = NULL,
right = NULL,
start = NULL,
print_initial = TRUE
)
vector with observations of losses
distribution for severity ("gamma" or "lognormal"). Defaults to "gamma".
numeric. Observations below this threshold are not present in the sample.
numeric. Observations above this threshold are not present in the sample. Defaults to Inf.
list of starting parameters for the algorithm.
print attempts for initial parameters.
fitdist returns an object of class "fitdist"
if (FALSE) {
# Original observations for severity
set.seed(1)
e <- rgamma(1000, scale = 148099.5, shape = 0.4887023)
# Truncated data (only claims above 30.000 euros)
threshold <- 30000
f <- e[e > threshold]
library(dplyr)
library(ggplot2)
data.frame(value = c(e, f),
variable = rep(c("Original data", "Only claims above 30.000 euros"),
c(length(e), length(f)))) %>%
filter(value < 5e5) %>%
mutate(value = value / 1000) %>%
ggplot(aes(x = value)) +
geom_histogram(colour = "white") +
facet_wrap(~variable, ncol = 1) +
labs(y = "Number of observations",
x = "Severity (x 1000 EUR)")
# scale = 156259.7 and shape = 0.4588. Close to parameters of original
# distribution!
x <- fit_truncated_dist(f, left = threshold, dist = "gamma")
# Print cdf
autoplot(x)
# CDF with modifications
autoplot(x, print_dig = 5, xlab = "loss", ylab = "cdf", ylim = c(.9, 1))
est_scale <- x$estimate[1]
est_shape <- x$estimate[2]
# Generate data from truncated distribution (between 30k en 20 mln)
rg <- rgammat(10, scale = est_scale, shape = est_shape, lower = 3e4,
upper = 20e6)
# Calculate quantiles
quantile(rg, probs = c(.5, .9, .99, .995))
}