feat(aio): update metatags to control search engine crawling (#21665)

The `<meta name="robots" content="noindex">` tag is used
to indicate to search engine crawlers that they should not index
the current page. This is set dynamically by the the document
viewer component to ensure that 404 and other erroring pages
are not added to the search index.

This relies upon the idea that the crawling bot will run the JS
and wait to see if this meta tag has been added or not.

Since we believe that the `googebot` will do this, we also
pre-emptively add a hard-coded noindex tag specifically for
this bot, so that if anything else fails in bootstrapping the app,
the failed page will not be added to the index.

Closes #21317

PR Close #21665
This commit is contained in:
Pete Bacon Darwin 2018-01-19 14:58:23 +00:00 committed by Misko Hevery
parent 0b38a039d0
commit 88045a5050
5 changed files with 77 additions and 4 deletions

View File

@ -128,6 +128,20 @@ describe('site App', function() {
});
describe('404 page', () => {
it('should add or remove the "noindex" meta tag depending upon the validity of the page', () => {
page.navigateTo('');
expect(element(by.css('meta[name="googlebot"]')).isPresent()).toBeFalsy();
expect(element(by.css('meta[name="robots"]')).isPresent()).toBeFalsy();
page.navigateTo('does/not/exist');
expect(element(by.css('meta[name="googlebot"][content="noindex"]')).isPresent()).toBeTruthy();
expect(element(by.css('meta[name="robots"][content="noindex"]')).isPresent()).toBeTruthy();
page.getTopMenuLink('features').click();
expect(element(by.css('meta[name="googlebot"]')).isPresent()).toBeFalsy();
expect(element(by.css('meta[name="robots"]')).isPresent()).toBeFalsy();
});
it('should search the index for words found in the url', () => {
page.navigateTo('http/router');
const results = page.getSearchResults();

View File

@ -1,10 +1,11 @@
import { ComponentRef } from '@angular/core';
import { ComponentFixture, TestBed } from '@angular/core/testing';
import { Title } from '@angular/platform-browser';
import { Title, Meta } from '@angular/platform-browser';
import { Observable } from 'rxjs/Observable';
import { of } from 'rxjs/observable/of';
import { FILE_NOT_FOUND_ID, FETCHING_ERROR_ID } from 'app/documents/document.service';
import { EmbedComponentsService } from 'app/embed-components/embed-components.service';
import { Logger } from 'app/shared/logger.service';
import { TocService } from 'app/shared/toc.service';
@ -413,6 +414,24 @@ describe('DocViewerComponent', () => {
await doRender('Qux content');
expect(addTitleAndTocSpy).toHaveBeenCalledTimes(4);
});
it('should remove "noindex" meta tags if the document is valid', async () => {
await doRender('foo', 'bar');
expect(TestBed.get(Meta).removeTag).toHaveBeenCalledWith('name="googlebot"');
expect(TestBed.get(Meta).removeTag).toHaveBeenCalledWith('name="robots"');
});
it('should add "noindex" meta tags if the document is 404', async () => {
await doRender('missing', FILE_NOT_FOUND_ID);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
it('should add "noindex" meta tags if the document fetching fails', async () => {
await doRender('error', FETCHING_ERROR_ID);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
});
describe('(embedding components)', () => {
@ -538,6 +557,8 @@ describe('DocViewerComponent', () => {
expect(logger.output.error).toEqual([
[`[DocViewer] Error preparing document 'foo': ${error.stack}`],
]);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
it('when `EmbedComponentsService.embedInto()` fails', async () => {
@ -557,6 +578,8 @@ describe('DocViewerComponent', () => {
expect(logger.output.error).toEqual([
[`[DocViewer] Error preparing document 'bar': ${error.stack}`],
]);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
it('when `destroyEmbeddedComponents()` fails', async () => {
@ -576,6 +599,8 @@ describe('DocViewerComponent', () => {
expect(logger.output.error).toEqual([
[`[DocViewer] Error preparing document 'baz': ${error.stack}`],
]);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
it('when `swapViews()` fails', async () => {
@ -595,6 +620,8 @@ describe('DocViewerComponent', () => {
expect(logger.output.error).toEqual([
[`[DocViewer] Error preparing document 'qux': ${error.stack}`],
]);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
it('when something fails with non-Error', async () => {
@ -611,6 +638,8 @@ describe('DocViewerComponent', () => {
expect(logger.output.error).toEqual([
[`[DocViewer] Error preparing document 'qux': ${error}`],
]);
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'googlebot', content: 'noindex' });
expect(TestBed.get(Meta).addTag).toHaveBeenCalledWith({ name: 'robots', content: 'noindex' });
});
});

View File

@ -1,5 +1,5 @@
import { Component, ComponentRef, DoCheck, ElementRef, EventEmitter, Input, OnDestroy, Output } from '@angular/core';
import { Title } from '@angular/platform-browser';
import { Title, Meta } from '@angular/platform-browser';
import { Observable } from 'rxjs/Observable';
import { of } from 'rxjs/observable/of';
@ -9,7 +9,7 @@ import 'rxjs/add/operator/do';
import 'rxjs/add/operator/switchMap';
import 'rxjs/add/operator/takeUntil';
import { DocumentContents } from 'app/documents/document.service';
import { DocumentContents, FILE_NOT_FOUND_ID, FETCHING_ERROR_ID } from 'app/documents/document.service';
import { EmbedComponentsService } from 'app/embed-components/embed-components.service';
import { Logger } from 'app/shared/logger.service';
import { TocService } from 'app/shared/toc.service';
@ -72,6 +72,7 @@ export class DocViewerComponent implements DoCheck, OnDestroy {
private embedComponentsService: EmbedComponentsService,
private logger: Logger,
private titleService: Title,
private metaService: Meta,
private tocService: TocService
) {
this.hostElement = elementRef.nativeElement;
@ -141,6 +142,8 @@ export class DocViewerComponent implements DoCheck, OnDestroy {
protected render(doc: DocumentContents): Observable<void> {
let addTitleAndToc: () => void;
this.setNoIndex(doc.id === FILE_NOT_FOUND_ID || doc.id === FETCHING_ERROR_ID);
return this.void$
// Security: `doc.contents` is always authored by the documentation team
// and is considered to be safe.
@ -156,10 +159,24 @@ export class DocViewerComponent implements DoCheck, OnDestroy {
const errorMessage = (err instanceof Error) ? err.stack : err;
this.logger.error(`[DocViewer] Error preparing document '${doc.id}': ${errorMessage}`);
this.nextViewContainer.innerHTML = '';
this.setNoIndex(true);
return this.void$;
});
}
/**
* Tell search engine crawlers whether to index this page
*/
private setNoIndex(val: boolean) {
if (val) {
this.metaService.addTag({ name: 'googlebot', content: 'noindex' });
this.metaService.addTag({ name: 'robots', content: 'noindex' });
} else {
this.metaService.removeTag('name="googlebot"');
this.metaService.removeTag('name="robots"');
}
}
/**
* Swap the views, removing `currViewContainer` and inserting `nextViewContainer`.
* (At this point all content should be ready, including having loaded and instantiated embedded

View File

@ -31,6 +31,13 @@
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="translucent">
<!--
Initially tell the Google crawler not to index this page.
If the page loads correctly will remove this tag (in the DocViewer).
Subsequent navigations will update the tag dynamically (i.e. soft 404).
Don't do the same for `robots` in general here, since they might not be able to handle the tag changing dynamically.
-->
<meta name="googlebot" content="noindex">
<!-- Google Analytics -->
<script>

View File

@ -1,5 +1,5 @@
import { Component, ComponentRef, NgModule, ViewChild } from '@angular/core';
import { Title } from '@angular/platform-browser';
import { Title, Meta } from '@angular/platform-browser';
import { Observable } from 'rxjs/Observable';
@ -51,6 +51,11 @@ export class MockTitle {
setTitle = jasmine.createSpy('Title#reset');
}
export class MockMeta {
addTag = jasmine.createSpy('Meta#addTag');
removeTag = jasmine.createSpy('Meta#removeTag');
}
export class MockTocService {
genToc = jasmine.createSpy('TocService#genToc');
reset = jasmine.createSpy('TocService#reset');
@ -65,6 +70,7 @@ export class MockTocService {
{ provide: Logger, useClass: MockLogger },
{ provide: EmbedComponentsService, useClass: MockEmbedComponentsService },
{ provide: Title, useClass: MockTitle },
{ provide: Meta, useClass: MockMeta },
{ provide: TocService, useClass: MockTocService },
],
})