From 680c821937d1a9b9b2330b974ad6c62ce9a3d169 Mon Sep 17 00:00:00 2001
From: "Daniel (dB.) Doubrovkine" <dblock@amazon.com>
Date: Tue, 4 Apr 2023 15:49:27 -0400
Subject: [PATCH] Add CI with link checker. (#3584)

* Add CI with link checker.

Signed-off-by: dblock <dblock@amazon.com>

* Capture URI::InvalidURIError.

Signed-off-by: dblock <dblock@amazon.com>

* Use HEAD and catch URI errors.

Signed-off-by: dblock <dblock@amazon.com>

* Retry on a 405 with a GET.

Signed-off-by: dblock <dblock@amazon.com>

* Replaced external link checker with ruby-link-checker.

Signed-off-by: dblock <dblock@amazon.com>

* Don't exit with an exception.

Signed-off-by: dblock <dblock@amazon.com>

* Run internal link checker on build/ci.

Signed-off-by: dblock <dblock@amazon.com>

* Added broken links issue template.

Signed-off-by: dblock <dblock@amazon.com>

* Added host exclusions that 404 or fail on bots.

Signed-off-by: dblock <dblock@amazon.com>

* Raise anyway because Jekyll does it for us.

Signed-off-by: dblock <dblock@amazon.com>

* Fix broken links.

Signed-off-by: dblock <dblock@amazon.com>

* Only run link checker on main.

Signed-off-by: dblock <dblock@amazon.com>

* Re-add check-links.sh.

Signed-off-by: dblock <dblock@amazon.com>

* Run once a day on cron.

Signed-off-by: dblock <dblock@amazon.com>

---------

Signed-off-by: dblock <dblock@amazon.com>
---
 .github/ISSUE_TEMPLATE/broken_links.md        |   7 +
 .github/workflows/jekyll-build.yml            |  16 +
 .github/workflows/link-checker.yml            |  25 ++
 Gemfile                                       |   6 +-
 _api-reference/explain.md                     |   2 +-
 _clients/OSC-dot-net.md                       |   2 +-
 _config.yml                                   |   8 +-
 _dashboards/reporting.md                      |   2 +-
 .../common-use-cases/trace-analytics.md       |   2 +-
 .../configuring-log4j.md                      |   2 +-
 .../managing-data-prepper/monitoring.md       |   4 +-
 .../configuration/sources/http-source.md      |   2 +-
 .../sources/otel-metrics-source.md            |   2 +-
 .../configuration/sources/otel-trace.md       |   2 +-
 _ml-commons-plugin/algorithms.md              |   2 +-
 _observing-your-data/ad/index.md              |   2 +-
 _plugins/link-checker.rb                      | 340 +++++++++++-------
 _search-plugins/sql/cli.md                    |   2 +-
 _search-plugins/sql/sql/odbc.md               |   2 +-
 _tools/k8s-operator.md                        |   2 +-
 _tools/logstash/index.md                      |   2 +-
 _tools/logstash/read-from-opensearch.md       |   2 +-
 build.sh                                      |   4 +-
 check-links.sh                                |   8 +-
 24 files changed, 282 insertions(+), 166 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/broken_links.md
 create mode 100644 .github/workflows/jekyll-build.yml
 create mode 100644 .github/workflows/link-checker.yml
 mode change 100644 => 100755 check-links.sh

diff --git a/.github/ISSUE_TEMPLATE/broken_links.md b/.github/ISSUE_TEMPLATE/broken_links.md
new file mode 100644
index 00000000..f9d38758
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/broken_links.md
@@ -0,0 +1,7 @@
+---
+title: '[AUTOCUT] Broken links'
+labels: 'bug'
+---
+
+Links checker has failed on push of your commit.
+Please examine the workflow log {{ env.WORKFLOW_URL }}.
diff --git a/.github/workflows/jekyll-build.yml b/.github/workflows/jekyll-build.yml
new file mode 100644
index 00000000..70130d48
--- /dev/null
+++ b/.github/workflows/jekyll-build.yml
@@ -0,0 +1,16 @@
+name: Jekyll Build Verification
+
+on: [pull_request]
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: '3.0'
+        bundler-cache: true
+    - run: |
+        JEKYLL_LINK_CHECKER=internal bundle exec jekyll build --future
diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml
new file mode 100644
index 00000000..2cd3ca7c
--- /dev/null
+++ b/.github/workflows/link-checker.yml
@@ -0,0 +1,25 @@
+name: Check Links
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "30 11 * * *"
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: '3.0'
+        bundler-cache: true
+    - run: |
+        JEKYLL_FATAL_LINK_CHECKER=all bundle exec jekyll build --future
+    - name: Create Issue On Build Failure
+      if: ${{ failure() }}
+      uses: dblock/create-a-github-issue@v3
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        WORKFLOW_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      with:
+        update_existing: true
+        filename: .github/ISSUE_TEMPLATE/broken_links.md
diff --git a/Gemfile b/Gemfile
index ef90cc0c..cd42fda5 100644
--- a/Gemfile
+++ b/Gemfile
@@ -32,4 +32,8 @@ gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 gem "wdm", "~> 0.1.0" if Gem.win_platform?
 
 # Installs webrick dependency for building locally
-gem "webrick", "~> 1.7"
\ No newline at end of file
+gem "webrick", "~> 1.7"
+
+# Link checker
+gem "typhoeus"
+gem "ruby-link-checker"
\ No newline at end of file
diff --git a/_api-reference/explain.md b/_api-reference/explain.md
index 2d79baef..f7b2d6f7 100644
--- a/_api-reference/explain.md
+++ b/_api-reference/explain.md
@@ -10,7 +10,7 @@ Introduced 1.0
 
 Wondering why a specific document ranks higher (or lower) for a query? You can use the explain API for an explanation of how the relevance score (`_score`) is calculated for every result.
 
-OpenSearch uses a probabilistic ranking framework called [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) to calculate relevance scores. Okapi BM25 is based on the original [TF/IDF](http://lucene.apache.org/core/{{site.lucene_version}}/core/org/apache/lucene/search/package-summary.html#scoring) framework used by Apache Lucene.
+OpenSearch uses a probabilistic ranking framework called [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) to calculate relevance scores. Okapi BM25 is based on the original [TF/IDF](https://lucene.apache.org/core/{{site.lucene_version}}/core/org/apache/lucene/search/package-summary.html#scoring) framework used by Apache Lucene.
 
 The explain API is an expensive operation in terms of both resources and time. On production clusters, we recommend using it sparingly for the purpose of troubleshooting.
 {: .warning }
diff --git a/_clients/OSC-dot-net.md b/_clients/OSC-dot-net.md
index abb10b2c..98a0b8e8 100644
--- a/_clients/OSC-dot-net.md
+++ b/_clients/OSC-dot-net.md
@@ -15,7 +15,7 @@ This getting started guide illustrates how to connect to OpenSearch, index docum
 
 ## Installing OpenSearch.Client
 
-To install OpenSearch.Client, download the [OpenSearch.Client NuGet package](https://www.nuget.org/packages/OpenSearch.Client) and add it to your project in an IDE of your choice. In Microsoft Visual Studio, follow the steps below: 
+To install OpenSearch.Client, download the [OpenSearch.Client NuGet package](https://www.nuget.org/packages/OpenSearch.Client/) and add it to your project in an IDE of your choice. In Microsoft Visual Studio, follow the steps below: 
 - In the **Solution Explorer** panel, right-click on your solution or project and select **Manage NuGet Packages for Solution**.
 - Search for the OpenSearch.Client NuGet package, and select **Install**.
 
diff --git a/_config.yml b/_config.yml
index 44e006f7..f99b1f39 100644
--- a/_config.yml
+++ b/_config.yml
@@ -5,10 +5,10 @@ baseurl: "/docs/latest" # the subpath of your site, e.g. /blog
 url: "https://opensearch.org" # the base hostname & protocol for your site, e.g. http://example.com
 permalink: /:path/
 
-opensearch_version: 2.6.0
-opensearch_dashboards_version: 2.6.0
-opensearch_major_minor_version: 2.6
-lucene_version: 9_5_0
+opensearch_version: '2.6.0'
+opensearch_dashboards_version: '2.6.0'
+opensearch_major_minor_version: '2.6'
+lucene_version: '9_5_0'
 
 # Build settings
 markdown: kramdown
diff --git a/_dashboards/reporting.md b/_dashboards/reporting.md
index ad502f8f..a6742129 100644
--- a/_dashboards/reporting.md
+++ b/_dashboards/reporting.md
@@ -54,7 +54,7 @@ This problem can occur for two reasons:
 
 - You don't have the correct version of `headless-chrome` to match the operating system on which OpenSearch Dashboards is running. Download the [correct version](https://github.com/opensearch-project/reporting/releases/tag/chromium-1.12.0.0).
 
-- You're missing additional dependencies. Install the required dependencies for your operating system from the [additional libraries](https://github.com/opensearch-project/dashboards-reports/blob/main/dashboards-reports/rendering-engine/headless-chrome/README.md#additional-libaries) section.
+- You're missing additional dependencies. Install the required dependencies for your operating system from the [additional libraries](https://github.com/opensearch-project/dashboards-reports/blob/1.x/dashboards-reports/rendering-engine/headless-chrome/README.md#additional-libaries) section.
 
 ### Characters not loading in reports
 
diff --git a/_data-prepper/common-use-cases/trace-analytics.md b/_data-prepper/common-use-cases/trace-analytics.md
index 32358f72..5cf24fad 100644
--- a/_data-prepper/common-use-cases/trace-analytics.md
+++ b/_data-prepper/common-use-cases/trace-analytics.md
@@ -39,7 +39,7 @@ The [OpenTelemetry source]({{site.url}}{{site.baseurl}}/data-prepper/pipelines/c
 There are three processors for the trace analytics feature:
 
 * *otel_trace_raw* - The *otel_trace_raw* processor receives a collection of [span](https://github.com/opensearch-project/data-prepper/blob/fa65e9efb3f8d6a404a1ab1875f21ce85e5c5a6d/data-prepper-api/src/main/java/org/opensearch/dataprepper/model/trace/Span.java) records from [*otel-trace-source*]({{site.url}}{{site.baseurl}}/data-prepper/pipelines/configuration/sources/otel-trace/), and performs stateful processing, extraction, and completion of trace-group-related fields.
-* *otel_trace_group* - The *otel_trace_group* processor fills in the missing trace-group-related fields in the collection of [span](https://github.com/opensearch-project/data-prepper/blob/fa65e9efb3f8d6a404a1ab1875f21ce85e5c5a6d/data-prepper-api/src/main/java/com/amazon/dataprepper/model/trace/Span.java) records by looking up the OpenSearch backend.
+* *otel_trace_group* - The *otel_trace_group* processor fills in the missing trace-group-related fields in the collection of [span](https://github.com/opensearch-project/data-prepper/blob/298e7931aa3b26130048ac3bde260e066857df54/data-prepper-api/src/main/java/org/opensearch/dataprepper/model/trace/Span.java) records by looking up the OpenSearch backend.
 * *service_map_stateful* – The *service_map_stateful* processor performs the required preprocessing for trace data and builds metadata to display the `service-map` dashboards.
 
 
diff --git a/_data-prepper/managing-data-prepper/configuring-log4j.md b/_data-prepper/managing-data-prepper/configuring-log4j.md
index a4b74a5c..175c754a 100644
--- a/_data-prepper/managing-data-prepper/configuring-log4j.md
+++ b/_data-prepper/managing-data-prepper/configuring-log4j.md
@@ -11,7 +11,7 @@ You can configure logging using Log4j in Data Prepper.
 
 ## Logging 
 
-Data Prepper uses [SLF4J](http://www.slf4j.org/) with a [Log4j 2 binding](http://logging.apache.org/log4j/2.x/log4j-slf4j-impl/). 
+Data Prepper uses [SLF4J](https://www.slf4j.org/) with a [Log4j 2 binding](https://logging.apache.org/log4j/2.x/log4j-slf4j-impl.html).
 
 For Data Prepper versions 2.0 and later, the Log4j 2 configuration file can be found and edited in `config/log4j2.properties` in the application's home directory. The default properties for Log4j 2 can be found in `log4j2-rolling.properties` in the *shared-config* directory.
 
diff --git a/_data-prepper/managing-data-prepper/monitoring.md b/_data-prepper/managing-data-prepper/monitoring.md
index fc070001..691f376b 100644
--- a/_data-prepper/managing-data-prepper/monitoring.md
+++ b/_data-prepper/managing-data-prepper/monitoring.md
@@ -11,11 +11,11 @@ You can monitor Data Prepper with metrics using [Micrometer](https://micrometer.
 
 ## JVM and system metrics
 
-JVM and system metrics are runtime metrics that are used to monitor Data Prepper instances. They include metrics for classloaders, memory, garbage collection, threads, and others. For more information, see [JVM and system metrics](https://micrometer.io/docs/ref/jvm). 
+JVM and system metrics are runtime metrics that are used to monitor Data Prepper instances. They include metrics for classloaders, memory, garbage collection, threads, and others. For more information, see [JVM and system metrics](https://micrometer.io/?/docs/ref/jvm).
 
 ### Naming
 
-JVM and system metrics follow predefined names in [Micrometer](https://micrometer.io/docs/concepts#_naming_meters). For example, the Micrometer metrics name for memory usage is `jvm.memory.used`. Micrometer changes the name to match the metrics system. Following the same example, `jvm.memory.used` is reported to Prometheus as `jvm_memory_used`, and is reported to Amazon CloudWatch as `jvm.memory.used.value`.
+JVM and system metrics follow predefined names in [Micrometer](https://micrometer.io/?/docs/concepts#_naming_meters). For example, the Micrometer metrics name for memory usage is `jvm.memory.used`. Micrometer changes the name to match the metrics system. Following the same example, `jvm.memory.used` is reported to Prometheus as `jvm_memory_used`, and is reported to Amazon CloudWatch as `jvm.memory.used.value`.
 
 ### Serving
 
diff --git a/_data-prepper/pipelines/configuration/sources/http-source.md b/_data-prepper/pipelines/configuration/sources/http-source.md
index 55de4d77..b41855cd 100644
--- a/_data-prepper/pipelines/configuration/sources/http-source.md
+++ b/_data-prepper/pipelines/configuration/sources/http-source.md
@@ -19,7 +19,7 @@ request_timeout | No | Integer | The request timeout, in milliseconds. Default v
 thread_count | No | Integer | The number of threads to keep in the ScheduledThreadPool. Default value is `200`.
 max_connection_count | No | Integer | The maximum allowed number of open connections. Default value is `500`.
 max_pending_requests | No | Integer | The maximum allowed number of tasks in the `ScheduledThreadPool` work queue. Default value is `1024`.
-authentication | No | Object | An authentication configuration. By default, this creates an unauthenticated server for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [ArmeriaHttpAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/ArmeriaHttpAuthenticationProvider.java).
+authentication | No | Object | An authentication configuration. By default, this creates an unauthenticated server for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [ArmeriaHttpAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/1.2.0/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/ArmeriaHttpAuthenticationProvider.java).
 ssl | No | Boolean | Enables TLS/SSL. Default value is false.
 ssl_certificate_file | Conditionally | String | SSL certificate chain file path or Amazon Simple Storage Service (Amazon S3) path. Amazon S3 path example `s3://<bucketName>/<path>`. Required if `ssl` is set to true and `use_acm_certificate_for_ssl` is set to false.
 ssl_key_file | Conditionally | String | SSL key file path or Amazon S3 path. Amazon S3 path example `s3://<bucketName>/<path>`. Required if `ssl` is set to true and `use_acm_certificate_for_ssl` is set to false.
diff --git a/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md b/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md
index 05f9e498..03019635 100644
--- a/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md
+++ b/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md
@@ -25,7 +25,7 @@ sslKeyFile | Conditionally | String | File-system path or Amazon S3 path to the
 useAcmCertForSSL | No | Boolean | Whether to enable TLS/SSL using a certificate and private key from AWS Certificate Manager (ACM). Default value is `false`.
 acmCertificateArn | Conditionally | String | Represents the ACM certificate ARN. ACM certificate take preference over S3 or local file system certificates. Required if `useAcmCertForSSL` is set to `true`.
 awsRegion | Conditionally | String | Represents the AWS Region used by ACM or Amazon S3. Required if `useAcmCertForSSL` is set to `true` or `sslKeyCertChainFile` and `sslKeyFile` is the Amazon S3 path.
-authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java).
+authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/1.2.0/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java).
 
 <!--- ## Configuration
 
diff --git a/_data-prepper/pipelines/configuration/sources/otel-trace.md b/_data-prepper/pipelines/configuration/sources/otel-trace.md
index 9b775ba1..c1294279 100644
--- a/_data-prepper/pipelines/configuration/sources/otel-trace.md
+++ b/_data-prepper/pipelines/configuration/sources/otel-trace.md
@@ -31,7 +31,7 @@ sslKeyFile | Conditionally | String | File system path or Amazon S3 path to the
 useAcmCertForSSL | No | Boolean | Whether to enable TLS/SSL using a certificate and private key from AWS Certificate Manager (ACM). Default value is `false`.
 acmCertificateArn | Conditionally | String | Represents the ACM certificate ARN. ACM certificate take preference over S3 or local file system certificate. Required if `useAcmCertForSSL` is set to `true`.
 awsRegion | Conditionally | String | Represents the AWS region used by ACM or Amazon S3. Required if `useAcmCertForSSL` is set to `true` or `sslKeyCertChainFile` and `sslKeyFile` are Amazon S3 paths.
-authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This parameter uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java).
+authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This parameter uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/1.2.0/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java).
 
 
 <!--- ## Configuration
diff --git a/_ml-commons-plugin/algorithms.md b/_ml-commons-plugin/algorithms.md
index c0a6ca35..9fe2ac5f 100644
--- a/_ml-commons-plugin/algorithms.md
+++ b/_ml-commons-plugin/algorithms.md
@@ -59,7 +59,7 @@ The training process supports multi-threads, but the number of threads should be
 
 ## Linear regression
 
-Linear regression maps the linear relationship between inputs and outputs. In ML Commons, the linear regression algorithm is adopted from the public machine learning library [Tribuo](https://tribuo.org/), which offers multidimensional linear regression models. The model supports the linear optimizer in training, including popular approaches like Linear Decay, SQRT_DECAY, [ADA](http://chrome-extension//gphandlahdpffmccakmbngmbjnjiiahp/https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf), [ADAM](https://tribuo.org/learn/4.1/javadoc/org/tribuo/math/optimisers/Adam.html), and [RMS_DROP](https://tribuo.org/learn/4.1/javadoc/org/tribuo/math/optimisers/RMSProp.html). 
+Linear regression maps the linear relationship between inputs and outputs. In ML Commons, the linear regression algorithm is adopted from the public machine learning library [Tribuo](https://tribuo.org/), which offers multidimensional linear regression models. The model supports the linear optimizer in training, including popular approaches like Linear Decay, SQRT_DECAY, [ADA](https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf), [ADAM](https://tribuo.org/learn/4.1/javadoc/org/tribuo/math/optimisers/Adam.html), and [RMS_DROP](https://tribuo.org/learn/4.1/javadoc/org/tribuo/math/optimisers/RMSProp.html). 
 
 ### Parameters
 
diff --git a/_observing-your-data/ad/index.md b/_observing-your-data/ad/index.md
index be4fabc5..3908d7c3 100644
--- a/_observing-your-data/ad/index.md
+++ b/_observing-your-data/ad/index.md
@@ -14,7 +14,7 @@ An anomaly in OpenSearch is any unusual behavior change in your time-series data
 
 It can be challenging to discover anomalies using conventional methods such as creating visualizations and dashboards. You could configure an alert based on a static threshold, but this requires prior domain knowledge and isn't adaptive to data that exhibits organic growth or seasonal behavior.
 
-Anomaly detection  automatically detects anomalies in your OpenSearch data in near real-time using the Random Cut Forest (RCF) algorithm. RCF is an unsupervised machine learning algorithm that models a sketch of your incoming data stream to compute an `anomaly grade` and `confidence score` value for each incoming data point. These values are used to differentiate an anomaly from normal variations. For more information about how RCF works, see [Random Cut Forests](https://api.semanticscholar.org/CorpusID:927435).
+Anomaly detection  automatically detects anomalies in your OpenSearch data in near real-time using the Random Cut Forest (RCF) algorithm. RCF is an unsupervised machine learning algorithm that models a sketch of your incoming data stream to compute an `anomaly grade` and `confidence score` value for each incoming data point. These values are used to differentiate an anomaly from normal variations. For more information about how RCF works, see [Random Cut Forests](https://www.semanticscholar.org/paper/Robust-Random-Cut-Forest-Based-Anomaly-Detection-on-Guha-Mishra/ecb365ef9b67cd5540cc4c53035a6a7bd88678f9).
 
 You can pair the anomaly detection plugin with the [alerting plugin]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/) to notify you as soon as an anomaly is detected.
 
diff --git a/_plugins/link-checker.rb b/_plugins/link-checker.rb
index 3eebed64..0c7df2b1 100644
--- a/_plugins/link-checker.rb
+++ b/_plugins/link-checker.rb
@@ -1,219 +1,283 @@
 # frozen_string_literal: true
 
-require "jekyll/hooks"
-require "jekyll/document"
-require "json"
-require "set"
-require "uri"
-require "pathname"
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+require 'net/http'
+require 'jekyll/hooks'
+require 'jekyll/document'
+require 'json'
+require 'set'
+require 'uri'
+require 'pathname'
+require 'typhoeus'
+require 'ruby-link-checker'
 
 ##
 # This singleton checks links during build to warn or fail upon finding dead links.
 #
-# `JEKYLL_CHECK_EXTERNAL_LINKS`, set on the environment, will cause verification of external links, irrespective of its
-# value. Usage: `JEKYLL_CHECK_EXTERNAL_LINKS= bundle exec jekyll build --trace`
+# `JEKYLL_LINK_CHECKER`, set on the environment, will cause verification of external links
+# Valid values: internal, forced, all.
+# Usage: `JEKYLL_LINK_CHECKER=internal bundle exec jekyll build --trace`
 #
-# `JEKYLL_FATAL_LINK_CHECKER`, set on the environment, will cause the build to fail if an internal dead link is found.
-# If set as `JEKYLL_FATAL_LINK_CHECKER=2`, the build will fail for internal and external dead links; in this case, there
-# is no need to set `JEKYLL_CHECK_EXTERNAL_LINKS`.
+# `JEKYLL_FATAL_LINK_CHECKER`, set on the environment, is the same as `JEKYLL_LINK_CHECKER`
+# except that it fails the build if there are broken links. it takes the same valid values
+# Usage: `JEKYLL_FATAL_LINK_CHECKER=internal bundle exec jekyll build --trace`
 
 module Jekyll::LinkChecker
-
+  ##
   # The collection that will get stores as the output
-  @urls = {}
 
+  @urls
+
+  ##
   # Pattern to identify documents that should be excluded based on their URL
-  @excluded_paths = /(\.(css|js|json|map|xml|txt|yml)$|\/version-selector\.tpl$)/i.freeze
 
+  @excluded_paths = %r{(\.(css|js|json|map|xml|txt|yml)$|/version-selector\.tpl$)}i.freeze
+
+  ##
   # Pattern to identify certain HTML tags whose content should be excluded from indexing
+
   @href_matcher = /<a[^>]+href=(['"])(.+?)\1/im.freeze
 
+  ##
   # Pattern to check for external URLs
-  @external_matcher = /^https?:\/\//.freeze
 
+  @external_matcher = %r{^https?://}.freeze
+  @forced_external_matcher = %r{^https?://.*(?=opensearch\.org/)}.freeze
+
+  ##
   # List of domains to ignore
-  @ignored_domains = %w[localhost]
+  # playground.opensearch.org is causing an infinite redirect
+  # LinkedIn mostly fails with 999 status codes
+  @ignored_domains = [
+    'localhost',
+    'playground.opensearch.org', # inifite redirect, https://github.com/opensearch-project/dashboards-anywhere/issues/172
+    'crates.io', # 404s on bots
+    'www.cloudflare.com', # 403s on bots
+  ]
 
+  ##
   # Pattern of local paths to ignore
-  @ignored_paths = /(^\/javadocs\/)/.freeze
-
-  # Pattern to exclude when adding the `index.html` suffix to paths
-  @need_no_suffix = /\.(?!html)[^\/]+$/.freeze
-
-  # Valid response codes for successful links
-  @success_codes = %w[200 302]
-
-  # Questionable response codes for successful links
-  @questionable_codes = %w[301 403 429]
+  @ignored_paths = %r{(^/javadocs|^mailto:)}.freeze
 
+  ##
   # Holds the list of failures
-  @failures = []
+  @failures
 
-  # Driven by environment variables, it indicates a need to check external links
-  @check_external_links
+  ##
+  # Build flags driven by environment variables
+  @@LINK_CHECKER_STATES = %w[internal forced all]
+  @check_links                # Enables the link checker
+  @check_forced_external      # Enables checking internal links marked as external e.g. /docs
+  @check_external_links       # Enables checking external links
+  @should_build_fatally       # indicates the need to fail the build for dead links
 
-  # Driven by environment variables, it indicates the need to fail the build for dead links
-  @should_build_fatally
+  ##
+  # Defines the priority of the plugin
+  # The hooks are registered with a very low priority to make sure they runs after any content modifying hook
+  def self.priority
+    10
+  end
 
+  ##
   # Initializes the singleton by recording the site
-  # return [void]
   def self.init(site)
     @site = site
     @urls = {}
     @failures = []
-    @base_url_matcher = /^#{@site.config["url"]}#{@site.baseurl}(\/.*)$/.freeze
+
+    begin
+      @should_build_fatally = true if ENV.key?('JEKYLL_FATAL_LINK_CHECKER')
+      check_flag = @should_build_fatally ? ENV['JEKYLL_FATAL_LINK_CHECKER'] : ENV['JEKYLL_LINK_CHECKER']
+
+      unless check_flag
+        return Jekyll.logger.info 'LinkChecker:', 'disabled. Enable with JEKYLL_LINK_CHECKER on the environment'
+      end
+
+      unless @@LINK_CHECKER_STATES.include?(check_flag)
+        Jekyll.logger.info "LinkChecker: [Notice] Could not initialize, Valid values for #{@should_build_fatally ? 'JEKYLL_FATAL_LINK_CHECKER' : 'JEKYLL_LINK_CHECKER'} are #{@@LINK_CHECKER_STATES}"
+        return
+      end
+
+      @external_link_checker = LinkChecker::Typhoeus::Hydra::Checker.new(
+        logger: Jekyll.logger,
+        hydra: { max_concurrency: 2 },
+        retries: 3
+      )
+
+      @external_link_checker.on :failure, :error do |result|
+        @failures << "#{result}, linked to in #{result.options[:location]}"
+      end
+
+      @check_links = true if @@LINK_CHECKER_STATES.include?(check_flag)
+      @check_forced_external = true if @@LINK_CHECKER_STATES[1..3].include?(check_flag)
+      @check_external_links = true if @@LINK_CHECKER_STATES[2..3].include?(check_flag)
+
+      msg = {
+        'internal' => 'internal links',
+        'forced' => 'internal and forced external links',
+        'all' => 'all links'
+      }
+
+      # Process a Page as soon as its content is ready
+      Jekyll::Hooks.register :pages, :post_convert, priority: priority do |page|
+        process(page)
+      end
+
+      # Process a Document as soon as its content is ready
+      Jekyll::Hooks.register :documents, :post_convert, priority: priority do |document|
+        process(document)
+      end
+
+      # Verify gathered links after Jekyll is done writing all its stuff
+      Jekyll::Hooks.register :site, :post_write, priority: priority do |site|
+        verify(site)
+      end
+
+      if @check_links
+        Jekyll.logger.info "LinkChecker: [Notice] Initialized successfully and will check #{msg[check_flag]}"
+      end
+      Jekyll.logger.info 'LinkChecker: [Notice] The build will fail if a dead link is found' if @should_build_fatally
+    rescue StandardError => e
+      Jekyll.logger.error 'LinkChecker: [Error] Failed to initialize Link Checker'
+      raise
+    end
   end
 
+  ##
   # Processes a Document or Page and adds the links to a collection
-  # It also checks for anchors that link to parts of the same page/doc
-  # return [void]
+  # It also checks for anchors to parts of the same page/doc
+
   def self.process(page)
+    return unless @check_links
     return if @excluded_paths.match(page.path)
 
     hrefs = page.content.scan(@href_matcher)
     hrefs.each do |(_, href)|
       relative_path = page.path[0] == '/' ? Pathname.new(page.path).relative_path_from(Dir.getwd) : page.path
 
-      if href.start_with? '#'
-        @failures << "##{href[1..]}, linked in ./#{relative_path}" if (page.content =~ /<[a-z0-9-]+[^>]+id="#{href[1..]}"/i).nil?
-      else
-        match = @base_url_matcher.match(href)
-        unless match.nil?
-          href = match[1]
+      if href.eql? '#'
+        next
+      elsif href.start_with? '#'
+        Jekyll.logger.info relative_path if (page.content =~ /<[a-z0-9-]+[^>]+(?:id|name)="#{href[1..]}"/i).nil?
+        if (page.content =~ /<[a-z0-9-]+[^>]+(?:id|name)="#{href[1..]}"/i).nil?
+          @failures << "##{href[1..]}, linked in ./#{relative_path}"
         end
+      else
         @urls[href] = Set[] unless @urls.key?(href)
         @urls[href] << relative_path
       end
     end
   end
 
-  # Verifies the validity of all the destinations gathered in @urls
-  # return [void]
-  def self.verify(site)
-    if ENV.key?('JEKYLL_CHECK_EXTERNAL_LINKS')
-      @check_external_links = true
-      puts "LinkChecker: [Notice] Will verify external links"
+  ##
+  # Saves the collection as a JSON file
+
+  def self.verify(_site)
+    return unless @check_links
+
+    @base_url_matcher = %r{^#{@site.config["url"]}#{@site.baseurl}(/.*)$}.freeze
+
+    @urls.sort_by { |url, pages| rand }.each do |url, pages|
+      location = "./#{pages.to_a.join(', ./')}"
+      @failures << "#{url}, linked to in #{location}" unless check(url, location)
     end
 
-    if ENV.key?('JEKYLL_FATAL_LINK_CHECKER')
-      @should_build_fatally = true
-      if ENV['JEKYLL_FATAL_LINK_CHECKER'] == '2'
-        @check_external_links = true
-        puts "LinkChecker: [Notice] The build will fail if any dead links are found"
+    @external_link_checker.run
+
+    unless @failures.empty?
+      msg = "Found #{@failures.size} dead link#{@failures.size > 1 ? 's' : ''}:\n#{@failures.join("\n")}"
+    end
+
+    if !@failures.empty?
+      if @should_build_fatally
+        Jekyll.logger.error "\nLinkChecker: [Error] #{msg}\n".red
+        raise msg
       else
-        puts "LinkChecker: [Notice] The build will fail if a dead internal link is found"
+        Jekyll.logger.warn "\nLinkChecker: [Warning] #{msg}\n".red
       end
-    end
-
-    @urls.each do |url, pages|
-      @failures << "#{url}, linked to in ./#{pages.to_a.join(", ./")}" unless self.check(url)
-    end
-    
-    msg = "Found #{@failures.size} dead link#{@failures.size > 1 ? 's' : ''}:\n#{@failures.join("\n")}" unless @failures.empty?
-
-    if @should_build_fatally
-      raise msg
     else
-      puts "\nLinkChecker: [Warning] #{msg}\n"
+      Jekyll.logger.info "\nLinkChecker: [Success] No broken links!\n".green
     end
   end
 
-  # Check if an internal or external URL is accessible
-  # @param url [String] the url to check
-  # @return [Boolean]
-  def self.check(url)
+  ##
+  # Check if URL is accessible
+
+  def self.check(url, location)
     match = @base_url_matcher.match(url)
-    unless match.nil?
-      url = match[1]
+    url = match[1] unless match.nil?
+
+    url = @site.config['url'] + url if url.start_with? '/docs/'
+
+    if @forced_external_matcher =~ url
+      return true unless @check_forced_external
+
+      return check_external(url, location)
     end
 
     if @external_matcher =~ url
       return true unless @check_external_links
-      return self.check_external(url)
+
+      return check_external(url, location)
     end
 
-    return self.check_internal(url)
+    check_internal(url, location)
   end
 
-  # Check if an external URL is accessible by making a HEAD call
-  # @param url [String] the url to check
-  # @return [Boolean]
-  def self.check_external(url)
-    uri = URI(url)
-    return true if @ignored_domains.include? uri.host
+  ##
+  # Check if an external URL is accessible
 
-    (Net::HTTP.new uri.host, uri.port).tap do |http|
-      http.use_ssl = true
-    end.start do |http|
-      http.use_ssl = (uri.scheme == "https")
-
-      request = Net::HTTP::Get.new(uri)
-
-      http.request(request) do |response|
-        return true if @success_codes.include? response.code
-
-        puts "LinkChecker: [Warning] Got #{response.code} from #{url}"
-        return @questionable_codes.include? response.code
-      end
+  def self.check_external(url, location)
+    url = begin
+      URI(url)
+    rescue StandardError
+      url
     end
+    return true if url.is_a?(URI) && @ignored_domains.include?(url.host)
+
+    @external_link_checker.check(url, { location: location })
   end
 
+  ##
   # Check if an internal link is accessible
-  # @param url [String] the url to check
-  # @return [Boolean]
-  def self.check_internal(url)
+
+  def self.check_internal(url, location)
+    Jekyll.logger.info "LinkChecker: [Info] Checking #{url}".cyan
     return true if @ignored_paths =~ url
 
     path, hash = url.split('#')
 
-    if @need_no_suffix =~ path
-        filename = File.join(@site.config["destination"], path)
-        return File.file?(filename)
-    else
-        unless path.end_with? 'index.html'
-          path << '/' unless path.end_with? '/'
-          path << 'index.html' unless path.end_with? 'index.html'
-        end
-
-        filename = File.join(@site.config["destination"], path)
-
-        return false unless File.file?(filename)
-
-        content = File.read(filename)
-        unless content.include? "<title>Redirecting"
-          return true if hash.nil? || hash.empty?
-          return !(content =~ /<[a-z0-9-]+[^>]+id="#{hash}"/i).nil?
-        end
-
-        match = content.match(@href_matcher)
-        if match.nil?
-          puts "LinkChecker: [Warning] Cannot check #{url} due to an unfollowable redirect"
-          return true
-        end
-
-        redirect = match[2]
-        redirect << '#' + hash unless hash.nil? || hash.empty?
-        return self.check(redirect)
+    unless path =~ %r{\.[^/]{2,}$}
+      path << '/' unless path.end_with? '/'
+      path << 'index.html' unless path.end_with? 'index.html'
     end
+
+    filename = File.join(@site.config['destination'], path)
+
+    return false unless File.file?(filename)
+
+    content = File.read(filename)
+    unless content.include? '<title>Redirecting'
+      return true if hash.nil? || hash.empty?
+
+      return !(content =~ /<[a-z0-9-]+[^>]+id="#{hash}"/i).nil?
+    end
+
+    match = content.match(@href_matcher)
+    if match.nil?
+      Jekyll.logger.warn "LinkChecker: [Warning] Cannot check #{url} due to an unfollowable redirect"
+      return true
+    end
+
+    redirect = match[2]
+    redirect << '#' + hash unless hash.nil? || hash.empty?
+    check(redirect, location)
   end
 end
 
 # Before any Document or Page is processed, initialize the LinkChecker
-Jekyll::Hooks.register :site, :pre_render do |site|
+Jekyll::Hooks.register :site, :pre_render, priority: Jekyll::LinkChecker.priority do |site|
   Jekyll::LinkChecker.init(site)
 end
-
-# Process a Page as soon as its content is ready
-Jekyll::Hooks.register :pages, :post_convert do |page|
-  Jekyll::LinkChecker.process(page)
-end
-
-# Process a Document as soon as its content is ready
-Jekyll::Hooks.register :documents, :post_convert do |document|
-  Jekyll::LinkChecker.process(document)
-end
-
-# Verify gathered links after Jekyll is done writing all its stuff
-Jekyll::Hooks.register :site, :post_write do |site|
-  Jekyll::LinkChecker.verify(site)
-end
\ No newline at end of file
diff --git a/_search-plugins/sql/cli.md b/_search-plugins/sql/cli.md
index a91c0ca0..553c88eb 100644
--- a/_search-plugins/sql/cli.md
+++ b/_search-plugins/sql/cli.md
@@ -58,7 +58,7 @@ You can configure the following connection properties:
 - `-u/-w`: Supports username and password for HTTP basic authentication, such as with the security plugin or fine-grained access control for Amazon OpenSearch Service.
 - `--aws-auth`: Turns on AWS sigV4 authentication to connect to an Amazon OpenSearch endpoint. Use with the AWS CLI (`aws configure`) to retrieve the local AWS configuration to authenticate and connect.
 
-For a list of all available configurations, see [clirc](https://github.com/opensearch-project/sql/blob/main/sql-cli/src/opensearch_sql_cli/conf/clirc).
+For a list of all available configurations, see [clirc](https://github.com/opensearch-project/sql/blob/1.x/sql-cli/src/opensearch_sql_cli/conf/clirc).
 
 ## Using the CLI
 
diff --git a/_search-plugins/sql/sql/odbc.md b/_search-plugins/sql/sql/odbc.md
index 35ba9012..52ba77ca 100644
--- a/_search-plugins/sql/sql/odbc.md
+++ b/_search-plugins/sql/sql/odbc.md
@@ -188,7 +188,7 @@ To list all the indices, click the search icon under **Table**.
 
 4. Start experimenting with data by dragging the table to the connection area. Choose **Update Now** or **Automatically Update** to populate the table data.
 
-See more detailed instructions in the [GitHub repository](https://github.com/opensearch-project/sql/blob/main/sql-odbc/docs/user/tableau_support.md).
+See more detailed instructions in the [GitHub repository](https://github.com/opensearch-project/sql/blob/1.x/sql-odbc/docs/user/tableau_support.md).
 
 ### Troubleshooting
 
diff --git a/_tools/k8s-operator.md b/_tools/k8s-operator.md
index 3f9f8512..5ed63079 100644
--- a/_tools/k8s-operator.md
+++ b/_tools/k8s-operator.md
@@ -19,7 +19,7 @@ There are two ways to get started with the operator:
 
 If you use Helm to manage your Kubernetes cluster, you can use the OpenSearch Kubernetes Operator's Cloud Native Computing Foundation (CNCF) project stored in Artifact Hub, a web-based application for finding, installing, and publishing CNCF packages. 
 
-To begin, log in to your Kubernetes cluster and add the Helm repository (repo) from [Artifact Hub](https://opster.github.io/opensearch-Kubernetes-operator/). 
+To begin, log in to your Kubernetes cluster and add the Helm repository (repo) from [Artifact Hub](https://artifacthub.io/packages/helm/opensearch-operator/opensearch-operator/). 
 
 ```
 helm repo add opensearch-operator https://opster.github.io/opensearch-k8s-operator/
diff --git a/_tools/logstash/index.md b/_tools/logstash/index.md
index deb44704..f3de0772 100644
--- a/_tools/logstash/index.md
+++ b/_tools/logstash/index.md
@@ -57,7 +57,7 @@ The OpenSearch Logstash plugin has two installation options at this time: Linux
 
 Make sure you have [Java Development Kit (JDK)](https://www.oracle.com/java/technologies/javase-downloads.html) version 8 or 11 installed.
 
-If you're migrating from an existing Logstash installation, you can install the [OpenSearch output plugin](https://rubygems.org/gems/logstash-output-opensearch/) manually and [update pipeline.conf](https://opensearch.org/docs/latest/clients/logstash/ship-to-opensearch/). We include this plugin by default in our tarball and Docker downloads.
+If you're migrating from an existing Logstash installation, you can install the [OpenSearch output plugin](https://rubygems.org/gems/logstash-output-opensearch/) manually and [update pipeline.conf](https://opensearch.org/docs/latest/tools/logstash/index/). We include this plugin by default in our tarball and Docker downloads.
 {: .note }
 
 ### Tarball
diff --git a/_tools/logstash/read-from-opensearch.md b/_tools/logstash/read-from-opensearch.md
index 6582048c..883a1e87 100644
--- a/_tools/logstash/read-from-opensearch.md
+++ b/_tools/logstash/read-from-opensearch.md
@@ -48,4 +48,4 @@ Like the output plugin, after adding your configuration to the `pipeline.conf` f
 
 Adding `stdout{}` to the `output{}` section of your `pipeline.conf` file prints the query results to the console. 
 
-To reindex the data into an OpenSearch domain, add the destination domain configuration in the `output{}` section like shown [here](https://opensearch.org/docs/latest/clients/logstash/ship-to-opensearch/#opensearch-output-plugin).
+To reindex the data into an OpenSearch domain, add the destination domain configuration in the `output{}` section like shown [here](https://opensearch.org/docs/latest/tools/logstash/index/).
diff --git a/build.sh b/build.sh
index 4806f144..060bbfa6 100755
--- a/build.sh
+++ b/build.sh
@@ -1 +1,3 @@
-bundle exec jekyll serve --host localhost --port 4000 --incremental --livereload --open-url
+#!/usr/bin/env bash
+
+JEKYLL_LINK_CHECKER=internal bundle exec jekyll serve --host localhost --port 4000 --incremental --livereload --open-url --trace
diff --git a/check-links.sh b/check-links.sh
old mode 100644
new mode 100755
index 0c4beb56..d5f459a6
--- a/check-links.sh
+++ b/check-links.sh
@@ -1,5 +1,3 @@
-# Checks for broken link in the documentation.
-# Run `bundle exec jekyll serve` first.
-# Uses https://github.com/stevenvachon/broken-link-checker
-# I have no idea why we have to exclude the ISM section, but that's the only way I can get this to run. - ae
-blc http://localhost:4000 -ro --exclude "*opensearch.org/*" --exclude "*github.com/opensearch-project/documentation-website/*" --exclude "*apache.org*" --exclude "https://localhost:5601/"
+#!/usr/bin/env bash
+
+JEKYLL_FATAL_LINK_CHECKER=all bundle exec jekyll build --future