Source code

Revision control

Copy as Markdown

Other Tools

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
</head>
<body>
<div id="results">
<div id="test1">
<div data-layout="organic">
<a href="https://foobar.com" data-testid="result-title-a">Extract domain from href (absolute URL).</a>
</div>
</div>
<div id="test2">
<div data-layout="organic">
<a href="https://foo.com" data-testid="result-title-a">Extract domain from href (absolute URL) - link1.</a>
<a href="https://bar.com" data-testid="result-title-a">Extract domain from href (absolute URL) - link2.</a>
<a href="https://baz.com" data-testid="result-title-a">Extract domain from href (absolute URL) - link3.</a>
<a href="https://qux.com" data-testid="result-title-a">Extract domain from href (absolute URL) - link4.</a>
</div>
</div>
<div id="test3">
<div data-layout="organic">
<a href="/dummy-page" data-testid="result-title-a">Extract domain from href (relative URL).</a>
<a href="https://example.org/dummy-page" data-testid="result-title-a">Extract domain from href.</a>
<a href="https://www.example.org/dummy-page" data-testid="result-title-a">Extract domain from href.</a>
</div>
</div>
<div id="test4">
<a href="#" data-dtld="www.abc.com">Extract domain from data attribute.</a>
</div>
<div id="test5">
<a href="#" data-dtld="www.foo.com">Extract domain from data attribute - link1.</a>
<a href="#" data-dtld="www.bar.com">Extract domain from data attribute - link2.</a>
<a href="#" data-dtld="www.baz.com">Extract domain from data attribute - link3.</a>
<a href="#" data-dtld="www.qux.com">Extract domain from data attribute - link4.</a>
</div>
<div id="test6">
<a href="https://www.example.org/testing?ad_domain=def.com" class="js-carousel-item-title">Extract domain from an href's query param value.</a>
<a href="https://example.org/testing?ad_domain=bar.com" class="js-carousel-item-title">Extract domain from an href's query param value.</a>
<a href="/testing?ad_domain=baz.com" class="js-carousel-item-title">Extract domain from a relative href containing a relevant query param value.</a>
</div>
<div id="test7">
<a href="https://example.com/test?ad_domain=https://def.com/path/to/nowhere">Extract domain from an href's query param value containing an absolute href.</a>
</div>
<div id="test8">
<a href="https://example.com/test?ad_domain=def.com/path/to/nowhere">Extract domain from an href's query param value containing a relative href.</a>
</div>
<div id="test9">
<a href="https://example.com/test?dummy_key=foo.com">Param value is missing from the href.</a>
</div>
<div id="test10">
<!-- Extraction preserves order of domains within the page. -->
<div data-layout="organic">
<a href="https://foobar.com" data-testid="result-title-a">Extract domain from href (absolute URL).</a>
<a href="#" data-dtld="www.abc.com">Extract domain from data attribute.</a>
<a href="example.com/testing?ad_domain=def.com" class="js-carousel-item-title">Extract domain from an href's query param value.</a>
</div>
</div>
<div id="test11">
<a href="nomatches.com">Link that doesn't match a selector.</a>
</div>
<div id="test12">
<a href="#" data-dtld="">Data attribute is present, but value is missing.</a>
</div>
<div id="test13">
<a href="example.com/testing?ad_domain=" class="js-carousel-item-title">Query param is present, but value is missing.</a>
</div>
<div id="test14">
<a href="git://testing.com/testrepo">Non-standard URL scheme.</a>
</div>
<div id="test15">
<h5>Second-level domains to a top-level domain.</h5>
<a href="https://www.foobar.gc.ca/">Link</a>
<a href="https://www.foobar.gov.uk/">Link</a>
<a href="https://foobar.co.uk">Link</a>
<a href="https://www.foobar.co.il">Link</a>
</div>
<div id="test16">
<a href="https://ab.cd.ef.gh.foobar.com/">URL with a long subdomain</a>
</div>
<div id="test17">
<h5>URL with the same top level domain.</h5>
<a href="https://foobar.com/">Link</a>
<a href="https://www.foobar.com/">Link</a>
<a href="https://abc.def.foobar.com/">Link</a>
</div>
<div id="test18">
<h5>More than the threshold of links.</h5>
<a href="https://foobar1.com/">Link</a>
<a href="https://foobar1.com/">Duplicate Link</a>
<a href="https://foobar2.com/">Link</a>
<a href="https://foobar3.com/">Link</a>
<a href="https://foobar4.com/">Link</a>
<a href="https://foobar5.com/">Link</a>
<a href="https://foobar6.com/">Link</a>
<a href="https://foobar7.com/">Link</a>
<a href="https://foobar8.com/">Link</a>
<a href="https://foobar9.com/">Link</a>
<a href="https://foobar10.com/">Link</a>
<a href="https://foobar11.com/">Link Outside Threshold</a>
</div>
<div id="test19">
<h5>More than the threshold of links using multiple matching selectors.</h5>
<a class="foo" href="https://foobar1.com/">Link</a>
<a class="foo" href="https://foobar2.com/">Link</a>
<a class="foo" href="https://foobar3.com/">Link</a>
<a class="foo" href="https://foobar4.com/">Link</a>
<a class="foo" href="https://foobar5.com/">Link</a>
<a class="foo" href="https://foobar6.com/">Link</a>
<a class="foo" href="https://foobar7.com/">Link</a>
<a class="foo" href="https://foobar8.com/">Link</a>
<a class="foo" href="https://foobar9.com/">Link</a>
<a class="baz" href="https://foobaz1.com/">Link</a>
<a class="baz" href="https://foobaz2.com/">Link Outside Threshold</a>
</div>
<div id="test20">
<div id="b_results">
<div class="b_algo">
<div class="b_attribution">
<cite>https://organic.com</cite>
</div>
</div>
</div>
</div>
<div id="test21">
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored.com</cite>
</div>
</div>
</div>
</div>
<div id="test22">
<div class="adsMvCarousel">
<cite>Fixed up from the carousel</cite>
</div>
</div>
<div id="test23">
<aside>
<cite>Fixed up from the sidebar</cite>
</aside>
</div>
<div id="test24">
<h5>More than the threshold of links using the text content selection method.</h5>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored1.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored2.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored3.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored4.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored5.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored6.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored7.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored8.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
<cite>https://sponsored9.com</cite>
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
</div>
</div>
</div>
<div id="b_results">
<div class="b_ad">
<div class="b_attribution">
</div>
</div>
</div>
</div>
<div id="test25">
<div id="b_results">
<div class="b_algo">
<div class="b_attribution">
<cite>organic.com</cite>
</div>
</div>
</div>
</div>
<div id="test26">
<div id="b_results">
<div class="b_algo">
<div class="b_attribution">
</div>
</div>
</div>
</div>
<div id="test27">
<div id="b_results">
<div class="b_algo">
<div class="b_attribution">
</div>
</div>
</div>
</div>
<div id="test28">
<div id="b_results">
<div class="b_algo">
<div class="b_attribution">
<span>HTTPS</span>
<cite>en.wikipedia.org/wiki/Cat</cite>
</div>
</div>
</div>
</div>
</div>
</body>
</html>