From 680c821937d1a9b9b2330b974ad6c62ce9a3d169 Mon Sep 17 00:00:00 2001 From: "Daniel (dB.) Doubrovkine" Date: Tue, 4 Apr 2023 15:49:27 -0400 Subject: [PATCH] Add CI with link checker. (#3584) * Add CI with link checker. Signed-off-by: dblock * Capture URI::InvalidURIError. Signed-off-by: dblock * Use HEAD and catch URI errors. Signed-off-by: dblock * Retry on a 405 with a GET. Signed-off-by: dblock * Replaced external link checker with ruby-link-checker. Signed-off-by: dblock * Don't exit with an exception. Signed-off-by: dblock * Run internal link checker on build/ci. Signed-off-by: dblock * Added broken links issue template. Signed-off-by: dblock * Added host exclusions that 404 or fail on bots. Signed-off-by: dblock * Raise anyway because Jekyll does it for us. Signed-off-by: dblock * Fix broken links. Signed-off-by: dblock * Only run link checker on main. Signed-off-by: dblock * Re-add check-links.sh. Signed-off-by: dblock * Run once a day on cron. Signed-off-by: dblock --------- Signed-off-by: dblock --- .github/ISSUE_TEMPLATE/broken_links.md | 7 + .github/workflows/jekyll-build.yml | 16 + .github/workflows/link-checker.yml | 25 ++ Gemfile | 6 +- _api-reference/explain.md | 2 +- _clients/OSC-dot-net.md | 2 +- _config.yml | 8 +- _dashboards/reporting.md | 2 +- .../common-use-cases/trace-analytics.md | 2 +- .../configuring-log4j.md | 2 +- .../managing-data-prepper/monitoring.md | 4 +- .../configuration/sources/http-source.md | 2 +- .../sources/otel-metrics-source.md | 2 +- .../configuration/sources/otel-trace.md | 2 +- _ml-commons-plugin/algorithms.md | 2 +- _observing-your-data/ad/index.md | 2 +- _plugins/link-checker.rb | 340 +++++++++++------- _search-plugins/sql/cli.md | 2 +- _search-plugins/sql/sql/odbc.md | 2 +- _tools/k8s-operator.md | 2 +- _tools/logstash/index.md | 2 +- _tools/logstash/read-from-opensearch.md | 2 +- build.sh | 4 +- check-links.sh | 8 +- 24 files changed, 282 insertions(+), 166 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/broken_links.md create mode 100644 .github/workflows/jekyll-build.yml create mode 100644 .github/workflows/link-checker.yml mode change 100644 => 100755 check-links.sh diff --git a/.github/ISSUE_TEMPLATE/broken_links.md b/.github/ISSUE_TEMPLATE/broken_links.md new file mode 100644 index 00000000..f9d38758 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/broken_links.md @@ -0,0 +1,7 @@ +--- +title: '[AUTOCUT] Broken links' +labels: 'bug' +--- + +Links checker has failed on push of your commit. +Please examine the workflow log {{ env.WORKFLOW_URL }}. diff --git a/.github/workflows/jekyll-build.yml b/.github/workflows/jekyll-build.yml new file mode 100644 index 00000000..70130d48 --- /dev/null +++ b/.github/workflows/jekyll-build.yml @@ -0,0 +1,16 @@ +name: Jekyll Build Verification + +on: [pull_request] + +jobs: + check: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.0' + bundler-cache: true + - run: | + JEKYLL_LINK_CHECKER=internal bundle exec jekyll build --future diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml new file mode 100644 index 00000000..2cd3ca7c --- /dev/null +++ b/.github/workflows/link-checker.yml @@ -0,0 +1,25 @@ +name: Check Links +on: + workflow_dispatch: + schedule: + - cron: "30 11 * * *" +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.0' + bundler-cache: true + - run: | + JEKYLL_FATAL_LINK_CHECKER=all bundle exec jekyll build --future + - name: Create Issue On Build Failure + if: ${{ failure() }} + uses: dblock/create-a-github-issue@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKFLOW_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + with: + update_existing: true + filename: .github/ISSUE_TEMPLATE/broken_links.md diff --git a/Gemfile b/Gemfile index ef90cc0c..cd42fda5 100644 --- a/Gemfile +++ b/Gemfile @@ -32,4 +32,8 @@ gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby] gem "wdm", "~> 0.1.0" if Gem.win_platform? # Installs webrick dependency for building locally -gem "webrick", "~> 1.7" \ No newline at end of file +gem "webrick", "~> 1.7" + +# Link checker +gem "typhoeus" +gem "ruby-link-checker" \ No newline at end of file diff --git a/_api-reference/explain.md b/_api-reference/explain.md index 2d79baef..f7b2d6f7 100644 --- a/_api-reference/explain.md +++ b/_api-reference/explain.md @@ -10,7 +10,7 @@ Introduced 1.0 Wondering why a specific document ranks higher (or lower) for a query? You can use the explain API for an explanation of how the relevance score (`_score`) is calculated for every result. -OpenSearch uses a probabilistic ranking framework called [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) to calculate relevance scores. Okapi BM25 is based on the original [TF/IDF](http://lucene.apache.org/core/{{site.lucene_version}}/core/org/apache/lucene/search/package-summary.html#scoring) framework used by Apache Lucene. +OpenSearch uses a probabilistic ranking framework called [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) to calculate relevance scores. Okapi BM25 is based on the original [TF/IDF](https://lucene.apache.org/core/{{site.lucene_version}}/core/org/apache/lucene/search/package-summary.html#scoring) framework used by Apache Lucene. The explain API is an expensive operation in terms of both resources and time. On production clusters, we recommend using it sparingly for the purpose of troubleshooting. {: .warning } diff --git a/_clients/OSC-dot-net.md b/_clients/OSC-dot-net.md index abb10b2c..98a0b8e8 100644 --- a/_clients/OSC-dot-net.md +++ b/_clients/OSC-dot-net.md @@ -15,7 +15,7 @@ This getting started guide illustrates how to connect to OpenSearch, index docum ## Installing OpenSearch.Client -To install OpenSearch.Client, download the [OpenSearch.Client NuGet package](https://www.nuget.org/packages/OpenSearch.Client) and add it to your project in an IDE of your choice. In Microsoft Visual Studio, follow the steps below: +To install OpenSearch.Client, download the [OpenSearch.Client NuGet package](https://www.nuget.org/packages/OpenSearch.Client/) and add it to your project in an IDE of your choice. In Microsoft Visual Studio, follow the steps below: - In the **Solution Explorer** panel, right-click on your solution or project and select **Manage NuGet Packages for Solution**. - Search for the OpenSearch.Client NuGet package, and select **Install**. diff --git a/_config.yml b/_config.yml index 44e006f7..f99b1f39 100644 --- a/_config.yml +++ b/_config.yml @@ -5,10 +5,10 @@ baseurl: "/docs/latest" # the subpath of your site, e.g. /blog url: "https://opensearch.org" # the base hostname & protocol for your site, e.g. http://example.com permalink: /:path/ -opensearch_version: 2.6.0 -opensearch_dashboards_version: 2.6.0 -opensearch_major_minor_version: 2.6 -lucene_version: 9_5_0 +opensearch_version: '2.6.0' +opensearch_dashboards_version: '2.6.0' +opensearch_major_minor_version: '2.6' +lucene_version: '9_5_0' # Build settings markdown: kramdown diff --git a/_dashboards/reporting.md b/_dashboards/reporting.md index ad502f8f..a6742129 100644 --- a/_dashboards/reporting.md +++ b/_dashboards/reporting.md @@ -54,7 +54,7 @@ This problem can occur for two reasons: - You don't have the correct version of `headless-chrome` to match the operating system on which OpenSearch Dashboards is running. Download the [correct version](https://github.com/opensearch-project/reporting/releases/tag/chromium-1.12.0.0). -- You're missing additional dependencies. Install the required dependencies for your operating system from the [additional libraries](https://github.com/opensearch-project/dashboards-reports/blob/main/dashboards-reports/rendering-engine/headless-chrome/README.md#additional-libaries) section. +- You're missing additional dependencies. Install the required dependencies for your operating system from the [additional libraries](https://github.com/opensearch-project/dashboards-reports/blob/1.x/dashboards-reports/rendering-engine/headless-chrome/README.md#additional-libaries) section. ### Characters not loading in reports diff --git a/_data-prepper/common-use-cases/trace-analytics.md b/_data-prepper/common-use-cases/trace-analytics.md index 32358f72..5cf24fad 100644 --- a/_data-prepper/common-use-cases/trace-analytics.md +++ b/_data-prepper/common-use-cases/trace-analytics.md @@ -39,7 +39,7 @@ The [OpenTelemetry source]({{site.url}}{{site.baseurl}}/data-prepper/pipelines/c There are three processors for the trace analytics feature: * *otel_trace_raw* - The *otel_trace_raw* processor receives a collection of [span](https://github.com/opensearch-project/data-prepper/blob/fa65e9efb3f8d6a404a1ab1875f21ce85e5c5a6d/data-prepper-api/src/main/java/org/opensearch/dataprepper/model/trace/Span.java) records from [*otel-trace-source*]({{site.url}}{{site.baseurl}}/data-prepper/pipelines/configuration/sources/otel-trace/), and performs stateful processing, extraction, and completion of trace-group-related fields. -* *otel_trace_group* - The *otel_trace_group* processor fills in the missing trace-group-related fields in the collection of [span](https://github.com/opensearch-project/data-prepper/blob/fa65e9efb3f8d6a404a1ab1875f21ce85e5c5a6d/data-prepper-api/src/main/java/com/amazon/dataprepper/model/trace/Span.java) records by looking up the OpenSearch backend. +* *otel_trace_group* - The *otel_trace_group* processor fills in the missing trace-group-related fields in the collection of [span](https://github.com/opensearch-project/data-prepper/blob/298e7931aa3b26130048ac3bde260e066857df54/data-prepper-api/src/main/java/org/opensearch/dataprepper/model/trace/Span.java) records by looking up the OpenSearch backend. * *service_map_stateful* – The *service_map_stateful* processor performs the required preprocessing for trace data and builds metadata to display the `service-map` dashboards. diff --git a/_data-prepper/managing-data-prepper/configuring-log4j.md b/_data-prepper/managing-data-prepper/configuring-log4j.md index a4b74a5c..175c754a 100644 --- a/_data-prepper/managing-data-prepper/configuring-log4j.md +++ b/_data-prepper/managing-data-prepper/configuring-log4j.md @@ -11,7 +11,7 @@ You can configure logging using Log4j in Data Prepper. ## Logging -Data Prepper uses [SLF4J](http://www.slf4j.org/) with a [Log4j 2 binding](http://logging.apache.org/log4j/2.x/log4j-slf4j-impl/). +Data Prepper uses [SLF4J](https://www.slf4j.org/) with a [Log4j 2 binding](https://logging.apache.org/log4j/2.x/log4j-slf4j-impl.html). For Data Prepper versions 2.0 and later, the Log4j 2 configuration file can be found and edited in `config/log4j2.properties` in the application's home directory. The default properties for Log4j 2 can be found in `log4j2-rolling.properties` in the *shared-config* directory. diff --git a/_data-prepper/managing-data-prepper/monitoring.md b/_data-prepper/managing-data-prepper/monitoring.md index fc070001..691f376b 100644 --- a/_data-prepper/managing-data-prepper/monitoring.md +++ b/_data-prepper/managing-data-prepper/monitoring.md @@ -11,11 +11,11 @@ You can monitor Data Prepper with metrics using [Micrometer](https://micrometer. ## JVM and system metrics -JVM and system metrics are runtime metrics that are used to monitor Data Prepper instances. They include metrics for classloaders, memory, garbage collection, threads, and others. For more information, see [JVM and system metrics](https://micrometer.io/docs/ref/jvm). +JVM and system metrics are runtime metrics that are used to monitor Data Prepper instances. They include metrics for classloaders, memory, garbage collection, threads, and others. For more information, see [JVM and system metrics](https://micrometer.io/?/docs/ref/jvm). ### Naming -JVM and system metrics follow predefined names in [Micrometer](https://micrometer.io/docs/concepts#_naming_meters). For example, the Micrometer metrics name for memory usage is `jvm.memory.used`. Micrometer changes the name to match the metrics system. Following the same example, `jvm.memory.used` is reported to Prometheus as `jvm_memory_used`, and is reported to Amazon CloudWatch as `jvm.memory.used.value`. +JVM and system metrics follow predefined names in [Micrometer](https://micrometer.io/?/docs/concepts#_naming_meters). For example, the Micrometer metrics name for memory usage is `jvm.memory.used`. Micrometer changes the name to match the metrics system. Following the same example, `jvm.memory.used` is reported to Prometheus as `jvm_memory_used`, and is reported to Amazon CloudWatch as `jvm.memory.used.value`. ### Serving diff --git a/_data-prepper/pipelines/configuration/sources/http-source.md b/_data-prepper/pipelines/configuration/sources/http-source.md index 55de4d77..b41855cd 100644 --- a/_data-prepper/pipelines/configuration/sources/http-source.md +++ b/_data-prepper/pipelines/configuration/sources/http-source.md @@ -19,7 +19,7 @@ request_timeout | No | Integer | The request timeout, in milliseconds. Default v thread_count | No | Integer | The number of threads to keep in the ScheduledThreadPool. Default value is `200`. max_connection_count | No | Integer | The maximum allowed number of open connections. Default value is `500`. max_pending_requests | No | Integer | The maximum allowed number of tasks in the `ScheduledThreadPool` work queue. Default value is `1024`. -authentication | No | Object | An authentication configuration. By default, this creates an unauthenticated server for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [ArmeriaHttpAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/ArmeriaHttpAuthenticationProvider.java). +authentication | No | Object | An authentication configuration. By default, this creates an unauthenticated server for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [ArmeriaHttpAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/1.2.0/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/ArmeriaHttpAuthenticationProvider.java). ssl | No | Boolean | Enables TLS/SSL. Default value is false. ssl_certificate_file | Conditionally | String | SSL certificate chain file path or Amazon Simple Storage Service (Amazon S3) path. Amazon S3 path example `s3:///`. Required if `ssl` is set to true and `use_acm_certificate_for_ssl` is set to false. ssl_key_file | Conditionally | String | SSL key file path or Amazon S3 path. Amazon S3 path example `s3:///`. Required if `ssl` is set to true and `use_acm_certificate_for_ssl` is set to false. diff --git a/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md b/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md index 05f9e498..03019635 100644 --- a/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md +++ b/_data-prepper/pipelines/configuration/sources/otel-metrics-source.md @@ -25,7 +25,7 @@ sslKeyFile | Conditionally | String | File-system path or Amazon S3 path to the useAcmCertForSSL | No | Boolean | Whether to enable TLS/SSL using a certificate and private key from AWS Certificate Manager (ACM). Default value is `false`. acmCertificateArn | Conditionally | String | Represents the ACM certificate ARN. ACM certificate take preference over S3 or local file system certificates. Required if `useAcmCertForSSL` is set to `true`. awsRegion | Conditionally | String | Represents the AWS Region used by ACM or Amazon S3. Required if `useAcmCertForSSL` is set to `true` or `sslKeyCertChainFile` and `sslKeyFile` is the Amazon S3 path. -authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java). +authentication | No | Object | An authentication configuration. By default, an unauthenticated server is created for the pipeline. This uses pluggable authentication for HTTPS. To use basic authentication, define the `http_basic` plugin with a `username` and `password`. To provide customer authentication, use or create a plugin that implements [GrpcAuthenticationProvider](https://github.com/opensearch-project/data-prepper/blob/1.2.0/data-prepper-plugins/armeria-common/src/main/java/com/amazon/dataprepper/armeria/authentication/GrpcAuthenticationProvider.java).