Skip to contents

Gambling neighbourhoods using HDBSCAN

This vignette demonstrates how to create interactive maps using the leaflet package in R to visualize the gambling neighbourhoods. In here we will load the geographical data containing the coordinates of gambling premises and the clustering results from HDBSCAN for different values of MinPts (2,3,4,5,10). We will create a separate map for each value of MinPts, showing the cluster centers and the number of gambling premises in each cluster. Clusters are in blue and represented by polygons, each with a popup displaying the cluster ID and the number of gambling premises within that cluster. Each polygon is sized according to the number of gambling premises it contains. Gambling premises inside each cluster are shown also in blue, while unclustered (noise) points are shown in black. Each point (a gambling premise GP) has a popup displaying its ID.

Load data

Load the geographical data (spatial points dataframe) containing the coordinates of gambling premises.

xy <- readRDS("data/xy.rds")

MinPts=2

Load dataframe with coordinates and cluster assignments

res2 <- readRDS("results/hdbscan_results_2.rds")
# Create a data frame from the spatial points
df2 <- create_df(res2,xy)
head(df2)
#> # A tibble: 6 × 5
#>    LONG   LAT cluster    ID   Pop
#>   <dbl> <dbl> <fct>   <int> <dbl>
#> 1 -2.10  57.1 13       7246    24
#> 2 -2.10  57.1 13       7929    24
#> 3 -2.10  57.1 13       4660    24
#> 4 -2.11  57.1 13       6787    24
#> 5 -2.11  57.1 13       5866    24
#> 6 -2.11  57.1 13       4658    24
dim(df2)
#> [1] 8761    5

Number of clusters (excluding noise):

k2 <- length(unique(df2$cluster)) - 1
k2
#> [1] 13

Number of noise points:

df2_null <- create_df_null(df2)
n_noise_k2 <- nrow(df2_null)
n_noise_k2
#> [1] 11

Leaflet map for MinPts=2

# Get the convex hulls for each cluster
cluster_hulls2 <- get_cluster_hulls(df2)
# Create the leaflet map
map2 <-leaflet_map(cluster_hulls2, df2, df2_null)
# saveWidget(map2, file="results/map_MinPt2.html")
map2

MinPts=3

Load dataframe with coordinates and cluster assignments

res3 <- readRDS("results/hdbscan_results_3.rds")
# Create a data frame from the spatial points
df3 <- create_df(res3,xy)
head(df3)
#> # A tibble: 6 × 5
#>    LONG   LAT cluster    ID   Pop
#>   <dbl> <dbl> <fct>   <int> <dbl>
#> 1 -2.10  57.1 117      7246    56
#> 2 -2.10  57.1 117      7929    56
#> 3 -2.10  57.1 117      4660    56
#> 4 -2.11  57.1 117      6787    56
#> 5 -2.11  57.1 117      5866    56
#> 6 -2.11  57.1 117      4658    56
dim(df3)
#> [1] 8761    5

Number of clusters (excluding noise):

#> [1] 117

Number of noise points:

#> [1] 125

Leaflet map for MinPts=3

MinPts=4

Load dataframe with coordinates and cluster assignments

res4 <- readRDS("results/hdbscan_results_4.rds")
# Create a data frame from the spatial points
df4 <- create_df(res4,xy)   
head(df4)
#> # A tibble: 6 × 5
#>    LONG   LAT cluster    ID   Pop
#>   <dbl> <dbl> <fct>   <int> <dbl>
#> 1 -2.10  57.1 0        7246     0
#> 2 -2.10  57.1 131      7929     4
#> 3 -2.10  57.1 131      4660     4
#> 4 -2.11  57.1 130      6787     9
#> 5 -2.11  57.1 130      5866     9
#> 6 -2.11  57.1 130      4658     9
dim(df4)
#> [1] 8761    5

Number of clusters (excluding noise):

#> [1] 131

Number of noise points:

#> [1] 260

Leaflet map for MinPts=4

MinPts=5

Load dataframe with coordinates and cluster assignments

res5 <- readRDS("results/hdbscan_results_5.rds")
# Create a data frame from the spatial points
df5 <- create_df(res5,xy)   
head(df5)   
#> # A tibble: 6 × 5
#>    LONG   LAT cluster    ID   Pop
#>   <dbl> <dbl> <fct>   <int> <dbl>
#> 1 -2.10  57.1 512      7246    56
#> 2 -2.10  57.1 512      7929    56
#> 3 -2.10  57.1 512      4660    56
#> 4 -2.11  57.1 512      6787    56
#> 5 -2.11  57.1 512      5866    56
#> 6 -2.11  57.1 512      4658    56
dim(df5)
#> [1] 8761    5

Number of clusters (excluding noise):

#> [1] 512

Number of noise points:

#> [1] 1808

Leaflet map for MinPts=5

MinPts=10

Load dataframe with coordinates and cluster assignments

res10 <- readRDS("results/hdbscan_results_10.rds")
# Create a data frame from the spatial points
df10 <- create_df(res10,xy)   
head(df10)   
#> # A tibble: 6 × 5
#>    LONG   LAT cluster    ID   Pop
#>   <dbl> <dbl> <fct>   <int> <dbl>
#> 1 -2.10  57.1 245      7246    56
#> 2 -2.10  57.1 245      7929    56
#> 3 -2.10  57.1 245      4660    56
#> 4 -2.11  57.1 245      6787    56
#> 5 -2.11  57.1 245      5866    56
#> 6 -2.11  57.1 245      4658    56
dim(df10)
#> [1] 8761    5

Number of clusters (excluding noise):

#> [1] 245

Number of noise points:

#> [1] 3119

Leaflet map for MinPts=10


Summary statistics


MinPts Parameter Number of Clusters Unclustered Points Largest Cluster Size Smallest Cluster Size
2 13 11 8664 2
3 117 125 7342 3
4 131 260 7341 4
5 512 1808 774 5
10 245 3119 94 10