Intelegentny_Pszczelarz/.venv/Lib/site-packages/sklearn/cluster/tests/test_optics.py

# Authors: Shane Grigsby <refuge@rocktalus.com>
#          Adrin Jalali <adrin.jalali@gmail.com>
# License: BSD 3 clause
import numpy as np
import pytest
from scipy import sparse
import warnings

from sklearn.datasets import make_blobs
from sklearn.cluster import OPTICS
from sklearn.cluster._optics import _extend_region, _extract_xi_labels
from sklearn.exceptions import DataConversionWarning
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.cluster import DBSCAN
from sklearn.utils import shuffle
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_allclose
from sklearn.exceptions import EfficiencyWarning
from sklearn.cluster.tests.common import generate_clustered_data


rng = np.random.RandomState(0)
n_points_per_cluster = 10
C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)
C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)
C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))


@pytest.mark.parametrize(
    ("r_plot", "end"),
    [
        [[10, 8.9, 8.8, 8.7, 7, 10], 3],
        [[10, 8.9, 8.8, 8.7, 8.6, 7, 10], 0],
        [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
        [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
    ],
)
def test_extend_downward(r_plot, end):
    r_plot = np.array(r_plot)
    ratio = r_plot[:-1] / r_plot[1:]
    steep_downward = ratio >= 1 / 0.9
    upward = ratio < 1

    e = _extend_region(steep_downward, upward, 0, 2)
    assert e == end


@pytest.mark.parametrize(
    ("r_plot", "end"),
    [
        [[1, 2, 2.1, 2.2, 4, 8, 8, np.inf], 6],
        [[1, 2, 2.1, 2.2, 2.3, 4, 8, 8, np.inf], 0],
        [[1, 2, 2.1, 2, np.inf], 0],
        [[1, 2, 2.1, np.inf], 2],
    ],
)
def test_extend_upward(r_plot, end):
    r_plot = np.array(r_plot)
    ratio = r_plot[:-1] / r_plot[1:]
    steep_upward = ratio <= 0.9
    downward = ratio > 1

    e = _extend_region(steep_upward, downward, 0, 2)
    assert e == end


@pytest.mark.parametrize(
    ("ordering", "clusters", "expected"),
    [
        [[0, 1, 2, 3], [[0, 1], [2, 3]], [0, 0, 1, 1]],
        [[0, 1, 2, 3], [[0, 1], [3, 3]], [0, 0, -1, 1]],
        [[0, 1, 2, 3], [[0, 1], [3, 3], [0, 3]], [0, 0, -1, 1]],
        [[3, 1, 2, 0], [[0, 1], [3, 3], [0, 3]], [1, 0, -1, 0]],
    ],
)
def test_the_extract_xi_labels(ordering, clusters, expected):
    labels = _extract_xi_labels(ordering, clusters)

    assert_array_equal(labels, expected)


def test_extract_xi(global_dtype):
    # small and easy test (no clusters around other clusters)
    # but with a clear noise data.
    rng = np.random.RandomState(0)
    n_points_per_cluster = 5

    C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)
    C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)
    C5 = [3, -2] + 0.6 * rng.randn(n_points_per_cluster, 2)
    C6 = [5, 6] + 0.2 * rng.randn(n_points_per_cluster, 2)

    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]]), C6)).astype(
        global_dtype, copy=False
    )
    expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5, -1, [4] * 5]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=3, min_cluster_size=2, max_eps=20, cluster_method="xi", xi=0.4
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    # check float min_samples and min_cluster_size
    clust = OPTICS(
        min_samples=0.1, min_cluster_size=0.08, max_eps=20, cluster_method="xi", xi=0.4
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6)).astype(
        global_dtype, copy=False
    )
    expected_labels = np.r_[
        [1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5, -1, -1, [4] * 5
    ]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=3, min_cluster_size=3, max_eps=20, cluster_method="xi", xi=0.3
    ).fit(X)
    # this may fail if the predecessor correction is not at work!
    assert_array_equal(clust.labels_, expected_labels)

    C1 = [[0, 0], [0, 0.1], [0, -0.1], [0.1, 0]]
    C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]
    C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]
    X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)
    expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=2, min_cluster_size=2, max_eps=np.inf, cluster_method="xi", xi=0.04
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)


def test_cluster_hierarchy_(global_dtype):
    rng = np.random.RandomState(0)
    n_points_per_cluster = 100
    C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2).astype(
        global_dtype, copy=False
    )
    C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2).astype(
        global_dtype, copy=False
    )
    X = np.vstack((C1, C2))
    X = shuffle(X, random_state=0)

    clusters = OPTICS(min_samples=20, xi=0.1).fit(X).cluster_hierarchy_
    assert clusters.shape == (2, 2)
    diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))
    assert diff / len(X) < 0.05


@pytest.mark.parametrize(
    "metric, is_sparse",
    [["minkowski", False], ["euclidean", True]],
)
def test_correct_number_of_clusters(metric, is_sparse):
    # in 'auto' mode

    n_clusters = 3
    X = generate_clustered_data(n_clusters=n_clusters)
    # Parameters chosen specifically for this task.
    # Compute OPTICS
    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=0.1, metric=metric)
    clust.fit(sparse.csr_matrix(X) if is_sparse else X)
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
    assert n_clusters_1 == n_clusters

    # check attribute types and sizes
    assert clust.labels_.shape == (len(X),)
    assert clust.labels_.dtype.kind == "i"

    assert clust.reachability_.shape == (len(X),)
    assert clust.reachability_.dtype.kind == "f"

    assert clust.core_distances_.shape == (len(X),)
    assert clust.core_distances_.dtype.kind == "f"

    assert clust.ordering_.shape == (len(X),)
    assert clust.ordering_.dtype.kind == "i"
    assert set(clust.ordering_) == set(range(len(X)))


def test_minimum_number_of_sample_check():
    # test that we check a minimum number of samples
    msg = "min_samples must be no greater than"

    # Compute OPTICS
    X = [[1, 1]]
    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)

    # Run the fit
    with pytest.raises(ValueError, match=msg):
        clust.fit(X)


def test_bad_extract():
    # Test an extraction of eps too close to original eps
    msg = "Specify an epsilon smaller than 0.15. Got 0.3."
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    # Compute OPTICS
    clust = OPTICS(max_eps=5.0 * 0.03, cluster_method="dbscan", eps=0.3, min_samples=10)
    with pytest.raises(ValueError, match=msg):
        clust.fit(X)


def test_bad_reachability():
    msg = "All reachability values are inf. Set a larger max_eps."
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    with pytest.warns(UserWarning, match=msg):
        clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)
        clust.fit(X)


def test_nowarn_if_metric_bool_data_bool():
    # make sure no warning is raised if metric and data are both boolean
    # non-regression test for
    # https://github.com/scikit-learn/scikit-learn/issues/18996

    pairwise_metric = "rogerstanimoto"
    X = np.random.randint(2, size=(5, 2), dtype=bool)

    with warnings.catch_warnings():
        warnings.simplefilter("error", DataConversionWarning)

        OPTICS(metric=pairwise_metric).fit(X)


def test_warn_if_metric_bool_data_no_bool():
    # make sure a *single* conversion warning is raised if metric is boolean
    # but data isn't
    # non-regression test for
    # https://github.com/scikit-learn/scikit-learn/issues/18996

    pairwise_metric = "rogerstanimoto"
    X = np.random.randint(2, size=(5, 2), dtype=np.int32)
    msg = f"Data will be converted to boolean for metric {pairwise_metric}"

    with pytest.warns(DataConversionWarning, match=msg) as warn_record:
        OPTICS(metric=pairwise_metric).fit(X)
        assert len(warn_record) == 1


def test_nowarn_if_metric_no_bool():
    # make sure no conversion warning is raised if
    # metric isn't boolean, no matter what the data type is
    pairwise_metric = "minkowski"
    X_bool = np.random.randint(2, size=(5, 2), dtype=bool)
    X_num = np.random.randint(2, size=(5, 2), dtype=np.int32)

    with warnings.catch_warnings():
        warnings.simplefilter("error", DataConversionWarning)

        # fit boolean data
        OPTICS(metric=pairwise_metric).fit(X_bool)
        # fit numeric data
        OPTICS(metric=pairwise_metric).fit(X_num)


def test_close_extract():
    # Test extract where extraction eps is close to scaled max_eps

    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    # Compute OPTICS
    clust = OPTICS(max_eps=1.0, cluster_method="dbscan", eps=0.3, min_samples=10).fit(X)
    # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
    assert max(clust.labels_) == 2


@pytest.mark.parametrize("eps", [0.1, 0.3, 0.5])
@pytest.mark.parametrize("min_samples", [3, 10, 20])
@pytest.mark.parametrize(
    "metric, is_sparse",
    [["minkowski", False], ["euclidean", False], ["euclidean", True]],
)
def test_dbscan_optics_parity(eps, min_samples, metric, is_sparse, global_dtype):
    # Test that OPTICS clustering labels are <= 5% difference of DBSCAN

    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )
    X = sparse.csr_matrix(X) if is_sparse else X

    X = X.astype(global_dtype, copy=False)

    # calculate optics with dbscan extract at 0.3 epsilon
    op = OPTICS(
        min_samples=min_samples, cluster_method="dbscan", eps=eps, metric=metric
    ).fit(X)

    # calculate dbscan labels
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)

    contingency = contingency_matrix(db.labels_, op.labels_)
    agree = min(
        np.sum(np.max(contingency, axis=0)), np.sum(np.max(contingency, axis=1))
    )
    disagree = X.shape[0] - agree

    percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)

    # verify label mismatch is <= 5% labels
    assert percent_mismatch <= 0.05


def test_min_samples_edge_case(global_dtype):
    C1 = [[0, 0], [0, 0.1], [0, -0.1]]
    C2 = [[10, 10], [10, 9], [10, 11]]
    C3 = [[100, 100], [100, 96], [100, 106]]
    X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)

    expected_labels = np.r_[[0] * 3, [1] * 3, [2] * 3]
    clust = OPTICS(min_samples=3, max_eps=7, cluster_method="xi", xi=0.04).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    expected_labels = np.r_[[0] * 3, [1] * 3, [-1] * 3]
    clust = OPTICS(min_samples=3, max_eps=3, cluster_method="xi", xi=0.04).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    expected_labels = np.r_[[-1] * 9]
    with pytest.warns(UserWarning, match="All reachability values"):
        clust = OPTICS(min_samples=4, max_eps=3, cluster_method="xi", xi=0.04).fit(X)
        assert_array_equal(clust.labels_, expected_labels)


# try arbitrary minimum sizes
@pytest.mark.parametrize("min_cluster_size", range(2, X.shape[0] // 10, 23))
def test_min_cluster_size(min_cluster_size, global_dtype):
    redX = X[::2].astype(global_dtype, copy=False)  # reduce for speed
    clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)
    cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])
    if cluster_sizes.size:
        assert min(cluster_sizes) >= min_cluster_size
    # check behaviour is the same when min_cluster_size is a fraction
    clust_frac = OPTICS(
        min_samples=9,
        min_cluster_size=min_cluster_size / redX.shape[0],
    )
    clust_frac.fit(redX)
    assert_array_equal(clust.labels_, clust_frac.labels_)


def test_min_cluster_size_invalid2():
    clust = OPTICS(min_cluster_size=len(X) + 1)
    with pytest.raises(ValueError, match="must be no greater than the "):
        clust.fit(X)

    clust = OPTICS(min_cluster_size=len(X) + 1, metric="euclidean")
    with pytest.raises(ValueError, match="must be no greater than the "):
        clust.fit(sparse.csr_matrix(X))


def test_processing_order():
    # Ensure that we consider all unprocessed points,
    # not only direct neighbors. when picking the next point.
    Y = [[0], [10], [-10], [25]]

    clust = OPTICS(min_samples=3, max_eps=15).fit(Y)
    assert_array_equal(clust.reachability_, [np.inf, 10, 10, 15])
    assert_array_equal(clust.core_distances_, [10, 15, np.inf, np.inf])
    assert_array_equal(clust.ordering_, [0, 1, 2, 3])


def test_compare_to_ELKI():
    # Expected values, computed with (future) ELKI 0.7.5 using:
    # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
    #   -algorithm clustering.optics.OPTICSHeap -optics.minpts 5
    # where the FixedDBIDsFilter gives 0-indexed ids.
    r1 = [
        np.inf,
        1.0574896366427478,
        0.7587934993548423,
        0.7290174038973836,
        0.7290174038973836,
        0.7290174038973836,
        0.6861627576116127,
        0.7587934993548423,
        0.9280118450166668,
        1.1748022534146194,
        3.3355455741292257,
        0.49618389254482587,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        4.163024452756142,
        1.623152630340929,
        0.45315840475822655,
        0.25468325192031926,
        0.2254004358159971,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.18765711877083036,
        0.18765711877083036,
        0.2240202988740153,
        1.154337614548715,
        1.342604473837069,
        1.323308536402633,
        0.8607514948648837,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.6101447895405373,
        1.3189208094864302,
        1.323308536402633,
        2.2509184159764577,
        2.4517810628594527,
        3.675977064404973,
        3.8264795626020365,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.8459300127258036,
        2.8459300127258036,
        2.8459300127258036,
        3.0321982337972537,
    ]
    o1 = [
        0,
        3,
        6,
        4,
        7,
        8,
        2,
        9,
        5,
        1,
        31,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        27,
        29,
        26,
        28,
        20,
        40,
        45,
        46,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        49,
        43,
        48,
        42,
        41,
        53,
        57,
        51,
        52,
        56,
        59,
        54,
        55,
        58,
        50,
    ]
    p1 = [
        -1,
        0,
        3,
        6,
        6,
        6,
        8,
        3,
        7,
        5,
        1,
        31,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        25,
        22,
        22,
        22,
        21,
        40,
        45,
        46,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        12,
        45,
        14,
        43,
        42,
        53,
        57,
        57,
        57,
        57,
        59,
        59,
        59,
        58,
    ]

    # Tests against known extraction array
    # Does NOT work with metric='euclidean', because sklearn euclidean has
    # worse numeric precision. 'minkowski' is slower but more accurate.
    clust1 = OPTICS(min_samples=5).fit(X)

    assert_array_equal(clust1.ordering_, np.array(o1))
    assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))
    assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))
    # ELKI currently does not print the core distances (which are not used much
    # in literature, but we can at least ensure to have this consistency:
    for i in clust1.ordering_[1:]:
        assert clust1.reachability_[i] >= clust1.core_distances_[clust1.predecessor_[i]]

    # Expected values, computed with (future) ELKI 0.7.5 using
    r2 = [
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.4928068613197889,
        np.inf,
        0.2666183922512113,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.1821471333893275,
        0.18715928772277457,
        0.18765711877083036,
        0.18765711877083036,
        0.25468325192031926,
        np.inf,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        0.34466409325984865,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
    ]
    o2 = [
        0,
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        46,
        20,
        22,
        25,
        23,
        27,
        29,
        24,
        26,
        28,
        21,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        31,
        40,
        41,
        42,
        43,
        44,
        45,
        48,
        49,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        57,
        58,
        59,
    ]
    p2 = [
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        -1,
        20,
        22,
        25,
        25,
        25,
        25,
        22,
        22,
        23,
        -1,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        38,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
    ]
    clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)

    assert_array_equal(clust2.ordering_, np.array(o2))
    assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))
    assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))

    index = np.where(clust1.core_distances_ <= 0.5)[0]
    assert_allclose(clust1.core_distances_[index], clust2.core_distances_[index])


def test_extract_dbscan(global_dtype):
    # testing an easy dbscan case. Not including clusters with different
    # densities.
    rng = np.random.RandomState(0)
    n_points_per_cluster = 20
    C1 = [-5, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C3 = [1, 2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.2 * rng.randn(n_points_per_cluster, 2)
    X = np.vstack((C1, C2, C3, C4)).astype(global_dtype, copy=False)

    clust = OPTICS(cluster_method="dbscan", eps=0.5).fit(X)
    assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])


@pytest.mark.parametrize("is_sparse", [False, True])
def test_precomputed_dists(is_sparse, global_dtype):
    redX = X[::2].astype(global_dtype, copy=False)
    dists = pairwise_distances(redX, metric="euclidean")
    dists = sparse.csr_matrix(dists) if is_sparse else dists
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", EfficiencyWarning)
        clust1 = OPTICS(min_samples=10, algorithm="brute", metric="precomputed").fit(
            dists
        )
    clust2 = OPTICS(min_samples=10, algorithm="brute", metric="euclidean").fit(redX)

    assert_allclose(clust1.reachability_, clust2.reachability_)
    assert_array_equal(clust1.labels_, clust2.labels_)
feature: "ANN commit 2" 2023-06-19 00:49:18 +02:00			`# Authors: Shane Grigsby <refuge@rocktalus.com>`
			`# Adrin Jalali <adrin.jalali@gmail.com>`
			`# License: BSD 3 clause`
			`import numpy as np`
			`import pytest`
			`from scipy import sparse`
			`import warnings`

			`from sklearn.datasets import make_blobs`
			`from sklearn.cluster import OPTICS`
			`from sklearn.cluster._optics import _extend_region, _extract_xi_labels`
			`from sklearn.exceptions import DataConversionWarning`
			`from sklearn.metrics.cluster import contingency_matrix`
			`from sklearn.metrics.pairwise import pairwise_distances`
			`from sklearn.cluster import DBSCAN`
			`from sklearn.utils import shuffle`
			`from sklearn.utils._testing import assert_array_equal`
			`from sklearn.utils._testing import assert_allclose`
			`from sklearn.exceptions import EfficiencyWarning`
			`from sklearn.cluster.tests.common import generate_clustered_data`


			`rng = np.random.RandomState(0)`
			`n_points_per_cluster = 10`
			`C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)`
			`C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)`
			`C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)`
			`C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)`
			`C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)`
			`X = np.vstack((C1, C2, C3, C4, C5, C6))`


			`@pytest.mark.parametrize(`
			`("r_plot", "end"),`
			`[`
			`[[10, 8.9, 8.8, 8.7, 7, 10], 3],`
			`[[10, 8.9, 8.8, 8.7, 8.6, 7, 10], 0],`
			`[[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],`
			`[[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],`
			`],`
			`)`
			`def test_extend_downward(r_plot, end):`
			`r_plot = np.array(r_plot)`
			`ratio = r_plot[:-1] / r_plot[1:]`
			`steep_downward = ratio >= 1 / 0.9`
			`upward = ratio < 1`

			`e = _extend_region(steep_downward, upward, 0, 2)`
			`assert e == end`


			`@pytest.mark.parametrize(`
			`("r_plot", "end"),`
			`[`
			`[[1, 2, 2.1, 2.2, 4, 8, 8, np.inf], 6],`
			`[[1, 2, 2.1, 2.2, 2.3, 4, 8, 8, np.inf], 0],`
			`[[1, 2, 2.1, 2, np.inf], 0],`
			`[[1, 2, 2.1, np.inf], 2],`
			`],`
			`)`
			`def test_extend_upward(r_plot, end):`
			`r_plot = np.array(r_plot)`
			`ratio = r_plot[:-1] / r_plot[1:]`
			`steep_upward = ratio <= 0.9`
			`downward = ratio > 1`

			`e = _extend_region(steep_upward, downward, 0, 2)`
			`assert e == end`


			`@pytest.mark.parametrize(`
			`("ordering", "clusters", "expected"),`
			`[`
			`[[0, 1, 2, 3], [[0, 1], [2, 3]], [0, 0, 1, 1]],`
			`[[0, 1, 2, 3], [[0, 1], [3, 3]], [0, 0, -1, 1]],`
			`[[0, 1, 2, 3], [[0, 1], [3, 3], [0, 3]], [0, 0, -1, 1]],`
			`[[3, 1, 2, 0], [[0, 1], [3, 3], [0, 3]], [1, 0, -1, 0]],`
			`],`
			`)`
			`def test_the_extract_xi_labels(ordering, clusters, expected):`
			`labels = _extract_xi_labels(ordering, clusters)`

			`assert_array_equal(labels, expected)`


			`def test_extract_xi(global_dtype):`
			`# small and easy test (no clusters around other clusters)`
			`# but with a clear noise data.`
			`rng = np.random.RandomState(0)`
			`n_points_per_cluster = 5`

			`C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)`
			`C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)`
			`C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)`
			`C5 = [3, -2] + 0.6 * rng.randn(n_points_per_cluster, 2)`
			`C6 = [5, 6] + 0.2 * rng.randn(n_points_per_cluster, 2)`

			`X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]]), C6)).astype(`
			`global_dtype, copy=False`
			`)`
			`expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5, -1, [4] * 5]`
			`X, expected_labels = shuffle(X, expected_labels, random_state=rng)`

			`clust = OPTICS(`
			`min_samples=3, min_cluster_size=2, max_eps=20, cluster_method="xi", xi=0.4`
			`).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`

			`# check float min_samples and min_cluster_size`
			`clust = OPTICS(`
			`min_samples=0.1, min_cluster_size=0.08, max_eps=20, cluster_method="xi", xi=0.4`
			`).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`

			`X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6)).astype(`
			`global_dtype, copy=False`
			`)`
			`expected_labels = np.r_[`
			`[1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5, -1, -1, [4] * 5`
			`]`
			`X, expected_labels = shuffle(X, expected_labels, random_state=rng)`

			`clust = OPTICS(`
			`min_samples=3, min_cluster_size=3, max_eps=20, cluster_method="xi", xi=0.3`
			`).fit(X)`
			`# this may fail if the predecessor correction is not at work!`
			`assert_array_equal(clust.labels_, expected_labels)`

			`C1 = [[0, 0], [0, 0.1], [0, -0.1], [0.1, 0]]`
			`C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]`
			`C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]`
			`X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)`
			`expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]`
			`X, expected_labels = shuffle(X, expected_labels, random_state=rng)`

			`clust = OPTICS(`
			`min_samples=2, min_cluster_size=2, max_eps=np.inf, cluster_method="xi", xi=0.04`
			`).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`


			`def test_cluster_hierarchy_(global_dtype):`
			`rng = np.random.RandomState(0)`
			`n_points_per_cluster = 100`
			`C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2).astype(`
			`global_dtype, copy=False`
			`)`
			`C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2).astype(`
			`global_dtype, copy=False`
			`)`
			`X = np.vstack((C1, C2))`
			`X = shuffle(X, random_state=0)`

			`clusters = OPTICS(min_samples=20, xi=0.1).fit(X).cluster_hierarchy_`
			`assert clusters.shape == (2, 2)`
			`diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))`
			`assert diff / len(X) < 0.05`


			`@pytest.mark.parametrize(`
			`"metric, is_sparse",`
			`[["minkowski", False], ["euclidean", True]],`
			`)`
			`def test_correct_number_of_clusters(metric, is_sparse):`
			`# in 'auto' mode`

			`n_clusters = 3`
			`X = generate_clustered_data(n_clusters=n_clusters)`
			`# Parameters chosen specifically for this task.`
			`# Compute OPTICS`
			`clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=0.1, metric=metric)`
			`clust.fit(sparse.csr_matrix(X) if is_sparse else X)`
			`# number of clusters, ignoring noise if present`
			`n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)`
			`assert n_clusters_1 == n_clusters`

			`# check attribute types and sizes`
			`assert clust.labels_.shape == (len(X),)`
			`assert clust.labels_.dtype.kind == "i"`

			`assert clust.reachability_.shape == (len(X),)`
			`assert clust.reachability_.dtype.kind == "f"`

			`assert clust.core_distances_.shape == (len(X),)`
			`assert clust.core_distances_.dtype.kind == "f"`

			`assert clust.ordering_.shape == (len(X),)`
			`assert clust.ordering_.dtype.kind == "i"`
			`assert set(clust.ordering_) == set(range(len(X)))`


			`def test_minimum_number_of_sample_check():`
			`# test that we check a minimum number of samples`
			`msg = "min_samples must be no greater than"`

			`# Compute OPTICS`
			`X = [[1, 1]]`
			`clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)`

			`# Run the fit`
			`with pytest.raises(ValueError, match=msg):`
			`clust.fit(X)`


			`def test_bad_extract():`
			`# Test an extraction of eps too close to original eps`
			`msg = "Specify an epsilon smaller than 0.15. Got 0.3."`
			`centers = [[1, 1], [-1, -1], [1, -1]]`
			`X, labels_true = make_blobs(`
			`n_samples=750, centers=centers, cluster_std=0.4, random_state=0`
			`)`

			`# Compute OPTICS`
			`clust = OPTICS(max_eps=5.0 * 0.03, cluster_method="dbscan", eps=0.3, min_samples=10)`
			`with pytest.raises(ValueError, match=msg):`
			`clust.fit(X)`


			`def test_bad_reachability():`
			`msg = "All reachability values are inf. Set a larger max_eps."`
			`centers = [[1, 1], [-1, -1], [1, -1]]`
			`X, labels_true = make_blobs(`
			`n_samples=750, centers=centers, cluster_std=0.4, random_state=0`
			`)`

			`with pytest.warns(UserWarning, match=msg):`
			`clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)`
			`clust.fit(X)`


			`def test_nowarn_if_metric_bool_data_bool():`
			`# make sure no warning is raised if metric and data are both boolean`
			`# non-regression test for`
			`# https://github.com/scikit-learn/scikit-learn/issues/18996`

			`pairwise_metric = "rogerstanimoto"`
			`X = np.random.randint(2, size=(5, 2), dtype=bool)`

			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", DataConversionWarning)`

			`OPTICS(metric=pairwise_metric).fit(X)`


			`def test_warn_if_metric_bool_data_no_bool():`
			`# make sure a single conversion warning is raised if metric is boolean`
			`# but data isn't`
			`# non-regression test for`
			`# https://github.com/scikit-learn/scikit-learn/issues/18996`

			`pairwise_metric = "rogerstanimoto"`
			`X = np.random.randint(2, size=(5, 2), dtype=np.int32)`
			`msg = f"Data will be converted to boolean for metric {pairwise_metric}"`

			`with pytest.warns(DataConversionWarning, match=msg) as warn_record:`
			`OPTICS(metric=pairwise_metric).fit(X)`
			`assert len(warn_record) == 1`


			`def test_nowarn_if_metric_no_bool():`
			`# make sure no conversion warning is raised if`
			`# metric isn't boolean, no matter what the data type is`
			`pairwise_metric = "minkowski"`
			`X_bool = np.random.randint(2, size=(5, 2), dtype=bool)`
			`X_num = np.random.randint(2, size=(5, 2), dtype=np.int32)`

			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", DataConversionWarning)`

			`# fit boolean data`
			`OPTICS(metric=pairwise_metric).fit(X_bool)`
			`# fit numeric data`
			`OPTICS(metric=pairwise_metric).fit(X_num)`


			`def test_close_extract():`
			`# Test extract where extraction eps is close to scaled max_eps`

			`centers = [[1, 1], [-1, -1], [1, -1]]`
			`X, labels_true = make_blobs(`
			`n_samples=750, centers=centers, cluster_std=0.4, random_state=0`
			`)`

			`# Compute OPTICS`
			`clust = OPTICS(max_eps=1.0, cluster_method="dbscan", eps=0.3, min_samples=10).fit(X)`
			`# Cluster ordering starts at 0; max cluster label = 2 is 3 clusters`
			`assert max(clust.labels_) == 2`


			`@pytest.mark.parametrize("eps", [0.1, 0.3, 0.5])`
			`@pytest.mark.parametrize("min_samples", [3, 10, 20])`
			`@pytest.mark.parametrize(`
			`"metric, is_sparse",`
			`[["minkowski", False], ["euclidean", False], ["euclidean", True]],`
			`)`
			`def test_dbscan_optics_parity(eps, min_samples, metric, is_sparse, global_dtype):`
			`# Test that OPTICS clustering labels are <= 5% difference of DBSCAN`

			`centers = [[1, 1], [-1, -1], [1, -1]]`
			`X, labels_true = make_blobs(`
			`n_samples=750, centers=centers, cluster_std=0.4, random_state=0`
			`)`
			`X = sparse.csr_matrix(X) if is_sparse else X`

			`X = X.astype(global_dtype, copy=False)`

			`# calculate optics with dbscan extract at 0.3 epsilon`
			`op = OPTICS(`
			`min_samples=min_samples, cluster_method="dbscan", eps=eps, metric=metric`
			`).fit(X)`

			`# calculate dbscan labels`
			`db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)`

			`contingency = contingency_matrix(db.labels_, op.labels_)`
			`agree = min(`
			`np.sum(np.max(contingency, axis=0)), np.sum(np.max(contingency, axis=1))`
			`)`
			`disagree = X.shape[0] - agree`

			`percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)`

			`# verify label mismatch is <= 5% labels`
			`assert percent_mismatch <= 0.05`


			`def test_min_samples_edge_case(global_dtype):`
			`C1 = [[0, 0], [0, 0.1], [0, -0.1]]`
			`C2 = [[10, 10], [10, 9], [10, 11]]`
			`C3 = [[100, 100], [100, 96], [100, 106]]`
			`X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)`

			`expected_labels = np.r_[[0] * 3, [1] * 3, [2] * 3]`
			`clust = OPTICS(min_samples=3, max_eps=7, cluster_method="xi", xi=0.04).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`

			`expected_labels = np.r_[[0] * 3, [1] * 3, [-1] * 3]`
			`clust = OPTICS(min_samples=3, max_eps=3, cluster_method="xi", xi=0.04).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`

			`expected_labels = np.r_[[-1] * 9]`
			`with pytest.warns(UserWarning, match="All reachability values"):`
			`clust = OPTICS(min_samples=4, max_eps=3, cluster_method="xi", xi=0.04).fit(X)`
			`assert_array_equal(clust.labels_, expected_labels)`


			`# try arbitrary minimum sizes`
			`@pytest.mark.parametrize("min_cluster_size", range(2, X.shape[0] // 10, 23))`
			`def test_min_cluster_size(min_cluster_size, global_dtype):`
			`redX = X[::2].astype(global_dtype, copy=False) # reduce for speed`
			`clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)`
			`cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])`
			`if cluster_sizes.size:`
			`assert min(cluster_sizes) >= min_cluster_size`
			`# check behaviour is the same when min_cluster_size is a fraction`
			`clust_frac = OPTICS(`
			`min_samples=9,`
			`min_cluster_size=min_cluster_size / redX.shape[0],`
			`)`
			`clust_frac.fit(redX)`
			`assert_array_equal(clust.labels_, clust_frac.labels_)`


			`def test_min_cluster_size_invalid2():`
			`clust = OPTICS(min_cluster_size=len(X) + 1)`
			`with pytest.raises(ValueError, match="must be no greater than the "):`
			`clust.fit(X)`

			`clust = OPTICS(min_cluster_size=len(X) + 1, metric="euclidean")`
			`with pytest.raises(ValueError, match="must be no greater than the "):`
			`clust.fit(sparse.csr_matrix(X))`


			`def test_processing_order():`
			`# Ensure that we consider all unprocessed points,`
			`# not only direct neighbors. when picking the next point.`
			`Y = [[0], [10], [-10], [25]]`

			`clust = OPTICS(min_samples=3, max_eps=15).fit(Y)`
			`assert_array_equal(clust.reachability_, [np.inf, 10, 10, 15])`
			`assert_array_equal(clust.core_distances_, [10, 15, np.inf, np.inf])`
			`assert_array_equal(clust.ordering_, [0, 1, 2, 3])`


			`def test_compare_to_ELKI():`
			`# Expected values, computed with (future) ELKI 0.7.5 using:`
			`# java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter`
			`# -algorithm clustering.optics.OPTICSHeap -optics.minpts 5`
			`# where the FixedDBIDsFilter gives 0-indexed ids.`
			`r1 = [`
			`np.inf,`
			`1.0574896366427478,`
			`0.7587934993548423,`
			`0.7290174038973836,`
			`0.7290174038973836,`
			`0.7290174038973836,`
			`0.6861627576116127,`
			`0.7587934993548423,`
			`0.9280118450166668,`
			`1.1748022534146194,`
			`3.3355455741292257,`
			`0.49618389254482587,`
			`0.2552805046961355,`
			`0.2552805046961355,`
			`0.24944622248445714,`
			`0.24944622248445714,`
			`0.24944622248445714,`
			`0.2552805046961355,`
			`0.2552805046961355,`
			`0.3086779122185853,`
			`4.163024452756142,`
			`1.623152630340929,`
			`0.45315840475822655,`
			`0.25468325192031926,`
			`0.2254004358159971,`
			`0.18765711877083036,`
			`0.1821471333893275,`
			`0.1821471333893275,`
			`0.18765711877083036,`
			`0.18765711877083036,`
			`0.2240202988740153,`
			`1.154337614548715,`
			`1.342604473837069,`
			`1.323308536402633,`
			`0.8607514948648837,`
			`0.27219111215810565,`
			`0.13260875220533205,`
			`0.13260875220533205,`
			`0.09890587675958984,`
			`0.09890587675958984,`
			`0.13548790801634494,`
			`0.1575483940837384,`
			`0.17515137170530226,`
			`0.17575920159442388,`
			`0.27219111215810565,`
			`0.6101447895405373,`
			`1.3189208094864302,`
			`1.323308536402633,`
			`2.2509184159764577,`
			`2.4517810628594527,`
			`3.675977064404973,`
			`3.8264795626020365,`
			`2.9130735341510614,`
			`2.9130735341510614,`
			`2.9130735341510614,`
			`2.9130735341510614,`
			`2.8459300127258036,`
			`2.8459300127258036,`
			`2.8459300127258036,`
			`3.0321982337972537,`
			`]`
			`o1 = [`
			`0,`
			`3,`
			`6,`
			`4,`
			`7,`
			`8,`
			`2,`
			`9,`
			`5,`
			`1,`
			`31,`
			`30,`
			`32,`
			`34,`
			`33,`
			`38,`
			`39,`
			`35,`
			`37,`
			`36,`
			`44,`
			`21,`
			`23,`
			`24,`
			`22,`
			`25,`
			`27,`
			`29,`
			`26,`
			`28,`
			`20,`
			`40,`
			`45,`
			`46,`
			`10,`
			`15,`
			`11,`
			`13,`
			`17,`
			`19,`
			`18,`
			`12,`
			`16,`
			`14,`
			`47,`
			`49,`
			`43,`
			`48,`
			`42,`
			`41,`
			`53,`
			`57,`
			`51,`
			`52,`
			`56,`
			`59,`
			`54,`
			`55,`
			`58,`
			`50,`
			`]`
			`p1 = [`
			`-1,`
			`0,`
			`3,`
			`6,`
			`6,`
			`6,`
			`8,`
			`3,`
			`7,`
			`5,`
			`1,`
			`31,`
			`30,`
			`30,`
			`34,`
			`34,`
			`34,`
			`32,`
			`32,`
			`37,`
			`36,`
			`44,`
			`21,`
			`23,`
			`24,`
			`22,`
			`25,`
			`25,`
			`22,`
			`22,`
			`22,`
			`21,`
			`40,`
			`45,`
			`46,`
			`10,`
			`15,`
			`15,`
			`13,`
			`13,`
			`15,`
			`11,`
			`19,`
			`15,`
			`10,`
			`47,`
			`12,`
			`45,`
			`14,`
			`43,`
			`42,`
			`53,`
			`57,`
			`57,`
			`57,`
			`57,`
			`59,`
			`59,`
			`59,`
			`58,`
			`]`

			`# Tests against known extraction array`
			`# Does NOT work with metric='euclidean', because sklearn euclidean has`
			`# worse numeric precision. 'minkowski' is slower but more accurate.`
			`clust1 = OPTICS(min_samples=5).fit(X)`

			`assert_array_equal(clust1.ordering_, np.array(o1))`
			`assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))`
			`assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))`
			`# ELKI currently does not print the core distances (which are not used much`
			`# in literature, but we can at least ensure to have this consistency:`
			`for i in clust1.ordering_[1:]:`
			`assert clust1.reachability_[i] >= clust1.core_distances_[clust1.predecessor_[i]]`

			`# Expected values, computed with (future) ELKI 0.7.5 using`
			`r2 = [`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`0.27219111215810565,`
			`0.13260875220533205,`
			`0.13260875220533205,`
			`0.09890587675958984,`
			`0.09890587675958984,`
			`0.13548790801634494,`
			`0.1575483940837384,`
			`0.17515137170530226,`
			`0.17575920159442388,`
			`0.27219111215810565,`
			`0.4928068613197889,`
			`np.inf,`
			`0.2666183922512113,`
			`0.18765711877083036,`
			`0.1821471333893275,`
			`0.1821471333893275,`
			`0.1821471333893275,`
			`0.18715928772277457,`
			`0.18765711877083036,`
			`0.18765711877083036,`
			`0.25468325192031926,`
			`np.inf,`
			`0.2552805046961355,`
			`0.2552805046961355,`
			`0.24944622248445714,`
			`0.24944622248445714,`
			`0.24944622248445714,`
			`0.2552805046961355,`
			`0.2552805046961355,`
			`0.3086779122185853,`
			`0.34466409325984865,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`np.inf,`
			`]`
			`o2 = [`
			`0,`
			`1,`
			`2,`
			`3,`
			`4,`
			`5,`
			`6,`
			`7,`
			`8,`
			`9,`
			`10,`
			`15,`
			`11,`
			`13,`
			`17,`
			`19,`
			`18,`
			`12,`
			`16,`
			`14,`
			`47,`
			`46,`
			`20,`
			`22,`
			`25,`
			`23,`
			`27,`
			`29,`
			`24,`
			`26,`
			`28,`
			`21,`
			`30,`
			`32,`
			`34,`
			`33,`
			`38,`
			`39,`
			`35,`
			`37,`
			`36,`
			`31,`
			`40,`
			`41,`
			`42,`
			`43,`
			`44,`
			`45,`
			`48,`
			`49,`
			`50,`
			`51,`
			`52,`
			`53,`
			`54,`
			`55,`
			`56,`
			`57,`
			`58,`
			`59,`
			`]`
			`p2 = [`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`10,`
			`15,`
			`15,`
			`13,`
			`13,`
			`15,`
			`11,`
			`19,`
			`15,`
			`10,`
			`47,`
			`-1,`
			`20,`
			`22,`
			`25,`
			`25,`
			`25,`
			`25,`
			`22,`
			`22,`
			`23,`
			`-1,`
			`30,`
			`30,`
			`34,`
			`34,`
			`34,`
			`32,`
			`32,`
			`37,`
			`38,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`-1,`
			`]`
			`clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)`

			`assert_array_equal(clust2.ordering_, np.array(o2))`
			`assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))`
			`assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))`

			`index = np.where(clust1.core_distances_ <= 0.5)[0]`
			`assert_allclose(clust1.core_distances_[index], clust2.core_distances_[index])`


			`def test_extract_dbscan(global_dtype):`
			`# testing an easy dbscan case. Not including clusters with different`
			`# densities.`
			`rng = np.random.RandomState(0)`
			`n_points_per_cluster = 20`
			`C1 = [-5, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`C2 = [4, -1] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`C3 = [1, 2] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`C4 = [-2, 3] + 0.2 * rng.randn(n_points_per_cluster, 2)`
			`X = np.vstack((C1, C2, C3, C4)).astype(global_dtype, copy=False)`

			`clust = OPTICS(cluster_method="dbscan", eps=0.5).fit(X)`
			`assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])`


			`@pytest.mark.parametrize("is_sparse", [False, True])`
			`def test_precomputed_dists(is_sparse, global_dtype):`
			`redX = X[::2].astype(global_dtype, copy=False)`
			`dists = pairwise_distances(redX, metric="euclidean")`
			`dists = sparse.csr_matrix(dists) if is_sparse else dists`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("ignore", EfficiencyWarning)`
			`clust1 = OPTICS(min_samples=10, algorithm="brute", metric="precomputed").fit(`
			`dists`
			`)`
			`clust2 = OPTICS(min_samples=10, algorithm="brute", metric="euclidean").fit(redX)`

			`assert_allclose(clust1.reachability_, clust2.reachability_)`
			`assert_array_equal(clust1.labels_, clust2.labels_)`