Home Identifier Source Test Repository

src/link-analyzer.es6

import _ from 'lodash';
import RoutePattern from 'route-pattern';
import Url from 'url';
import Checker from './checker';

/**
 * @class LinkAnalyzer
 * @desc Hold logic about recursive links processing
 */
export default class LinkAnalyzer {
    /**
     * Initialize predefined skip rules for prevent deeper crawling for given url
     * @param {String} initial url
     * @param {Object} options object
     * @constructor
     */
    constructor(initial, options) {
        /**
         * Parsed url object from Url.parse
         * @param  {Object} initial url - base (root) url where analyze started from
         * @return {Object} options object
         */
        this._url = Url.parse(initial);
        this._options = options;
    }

    /**
     * Returns options model
     * @returns {BasedOption}
     */
    get options() {
        return this._options;
    }

    /**
     * Returns initial url as parsed url string via Url module
     * @returns {Object}
     */
    get url() {
        return this._url;
    }

    /**
     * Returns true if anyone of skip conditions returns true
     * @param {String} url url of link
     * @param {String} baseUrl url of page where link is
     * @returns {boolean} result flag
     * @public
     */
    isNeedToSkipUrl(url, baseUrl) {
        return [
            '_skipNonAcceptableProtocols',
            '_skipExternalUrls',
            '_skipExcludedUrls',
            '_skipOnMode'
        ].some(fName => {
            return this[fName](url, baseUrl);
        });
    }

    /**
     * Checks if given url is external
     * @param  {String}  url request url
     * @return {boolean} true if url is external. false otherwise
     * @public
     */
    isExternal(url) {
        return url.indexOf(this._url['hostname']) < 0;
    }

    /**
     * Check if protocol of given url satisfies acceptedSchemes criteria
     * @param {String} url request url
     * @returns {boolean} result flag
     * @private
     */
    _skipNonAcceptableProtocols(url) {
        return this._options.getOption('acceptedSchemes').indexOf(Url.parse(url).protocol) < 0;
    }

    /**
     * Checks if given url has the different hostname then initial
     * (If 'checkExternalUrls'rule is set to true)
     * @param {String} url request url
     * @returns {boolean} result flag
     * @private
     */
    _skipExternalUrls(url) {
        return !this._options.getOption('checkExternalUrls') && url.indexOf(this._url['hostname']) < 0;
    }

    /**
     * Checks if given url has host different then host of initial url
     * @param {String} url request
     * @returns {boolean} result flag
     * @private
     */
    _skipExcludedUrls(url) {
        return this._options.getOption('excludeLinkPatterns').some(pattern => {
            return _.isRegExp(pattern) ? pattern.test(url) : RoutePattern.fromString(pattern).matches(url);
        });
    }

    /**
     * Checks if given url is need to be check depending on mode configuration option
     * @type {boolean} result flag
     * @private
     */
    _skipOnMode(url, baseUrl) {
        var mode = this._options.getOption('mode'),
            baseUrlParsed = Url.parse(baseUrl);

        const MODES = Checker.CONSTANTS.MODE;
        if (mode === MODES.PAGE) {
            return baseUrlParsed.path !== this._url.path;
        } else if (mode === MODES.SECTION) {
            return baseUrlParsed.path.indexOf(this._url.path) === -1;
        } else {
            return false;
        }
    }
}