Identify URLs and create links

If I have a text, for example:

Go to www.stackoverflow.com to answer your questions.

How do I identify the URL and create a link to it:

Acesse o <a href="www.stackoverflow.com">www.stackoverflow.com</a> para tirar suas dúvidas.

Preferably using JavaScript or Java.

Author: bfavaretto, 2014-04-10

3 answers

To accomplish this task, the technique is the same as used for BBCode. You need to use regular expressions to identify possible URLs and replace them with populated anchors.

I created an expression to exemplify with javascript: http://rubular.com/r/btYgux8UTc

After creating the expression to isolate the possible URLs, you will use the function String.prototype.replace to replace them with anchors, retrieving the main part for the hyper text reference (href).

See an example:

var

    reURL = /((?:http(s)?:\/\/)?(?:www(\d)?\.)?([\w\-]+\.\w{2,})\/?((?:\?(?:[\w\-]+(?:=[\w\-]+)?)?(?:&[\w\-]+(?:=[\w\-]+)?)?))?(#(?:[^\s]+)?)?)/g,

    text = 'Acesse o www.stackoverflow.com para tirar suas dúvidas.',

    html = text.replace(reURL, '<a href="http$2://www$3.$4$5$6">$1</a>');

In this case the variable html will contain the same value as text, but replacing the URLs with anchors.

You can also create a function to make this more efficient:

String.prototype.URLToAnchors = function() {

    return this.replace(/((?:http(s)?:\/\/)?(?:www(\d)?\.)?([\w\-]+\.\w{2,})\/?((?:\?(?:[\w\-]+(?:=[\w\-]+)?)?(?:&[\w\-]+(?:=[\w\-]+)?)?))?(#(?:[^\s]+)?)?)/g, '<a href="http$2://www$3.$4$5">$1</a>');

}

Usage looks like this:

'Acesse o www.stackoverflow.com para tirar suas dúvidas.'.URLToAnchors();

This expression I created should serve the most diverse URL formats with search, URL comments, security protocol etc... In case anyone has in mind to improve something please share the permalink.

 4
Author: Diego Lopes Lima, 2014-04-10 17:46:51

You can use regular expressions regex, to search for combinations in the string, see:

        str = "Lorem ipsum dolor www.stackoverflow.com sit amet, consectetur adipiscing elit. 
               Morbi sit amet ultricies nunc";
        var regexp = /(?:www\.|(?!www))[^\s\.]+\.[^\s]{2,}|www\.[^\s]+\.[^\s]{2,}/gi;
        var matches_array = str.match(regexp);

Example js Fiddle

 1
Author: abfurlan, 2014-04-10 17:23:07

There's the solution, including truncating very large URLs to improve display

var expressao_regular_url = /^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$/;

function isUrl(texto)
{
    return expressao_regular_url.test(texto);
}

function urlTruncada(url)
{
    var limite_1 = 30;
    var limite_2 = 15;
    var reticencias = '[...]';
    if (url.length > limite_1 + limite_2 + reticencias.length)
    {
        url =
            url.substring(0, limite_1) + 
            reticencias + 
            url.substring(url.length - limite_2);
    }
    return url;
}

function autoUrl(texto)
{
    var texto_saida = '';
    var token = '';
    var caractere_fim_texto = String.fromCharCode(3);
    var separadores = ['\r', '\n', ' ', caractere_fim_texto];
    var caractere = '';
    var length_texto = texto.length;
    texto += caractere_fim_texto;
    for (var i in texto)
    {
        caractere = texto[i];
        if (separadores.indexOf(caractere) >= 0)
        {
            if (token)
            {
                if (isUrl(token))
                {
                    texto_saida += 
                        '<a href="' + (token.search('://') < 0 ? 'http://' : '') + token + '" target="_blank">' + 
                            urlTruncada(token) + 
                        '</a>';
                }
                else
                {
                    texto_saida += token;
                }
                token = '';
                if (parseInt(i) < length_texto)
                {
                    texto_saida += caractere;
                }
            }
        }
        else
        {
            token += caractere;
        }
    }
    return texto_saida;
}
 0
Author: Daniel, 2017-10-12 21:15:41