curl --request GET \
--url https://scrape.abstractapi.com/v1
"<html lang="en" op="news"><head><meta name="referrer" content="origin"><meta name="viewport" content="width=device-width, initial-scale=1.0"><link rel="stylesheet" type="text/css" href="news.css?ZjsaulklTz22B6Rfir3c">
<link rel="shortcut icon" href="favicon.ico">
<link rel="alternate" type="application/rss+xml" title="RSS" href="rss">
<title>Hacker News</title></head><body><center><table id="hnmain" border="0" cellpadding="0" cellspacing="0" width="85%" bgcolor="#f6f6ef">
<tr><td bgcolor="#ff6600"><table border="0" cellpadding="0" cellspacing="0" width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="https://news.ycombinator.com"><img src="y18.svg" width="18" height="18" style="border:1px white solid; display:block"></a></td>
<td style="line-height:12pt; height:10px;"><span class="pagetop"><b class="hnname"><a href="news">Hacker News</a></b>
<a href="newest">new</a> | <a href="front">past</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="show">show</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a> </span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop">
<a href="login?goto=news">login</a>
</span></td>
</tr></table></td></tr>
<tr id="pagespace" title="" style="height:10px"></tr><tr><td>
<table border="0" cellpadding="0" cellspacing="0">
<tr class='athing' id='36478206'>
<td align="right" valign="top" class="title"><span class="rank">1.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478206'href='vote?id=36478206&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://matan-h.com/google-has-a-secret-browser-hidden-inside-the-settings/" rel="noreferrer">Google has a secret browser hidden inside the settings</a><span class="sitebit comhead"> (<a href="from?site=matan-h.com"><span class="sitestr">matan-h.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478206">412 points</span> by <a href="user?id=matan-h" class="hnuser">matan-h</a> <span class="age" title="2023-06-26T11:13:03"><a href="item?id=36478206">3 hours ago</a></span> <span id="unv_36478206"></span> | <a href="hide?id=36478206&goto=news">hide</a> | <a href="item?id=36478206">140 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36478892'>
<td align="right" valign="top" class="title"><span class="rank">2.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478892'href='vote?id=36478892&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://faultlore.com/blah/text-hates-you/" rel="noreferrer">Text Rendering Hates You (2019)</a><span class="sitebit comhead"> (<a href="from?site=faultlore.com"><span class="sitestr">faultlore.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478892">96 points</span> by <a href="user?id=subset" class="hnuser">subset</a> <span class="age" title="2023-06-26T12:32:55"><a href="item?id=36478892">2 hours ago</a></span> <span id="unv_36478892"></span> | <a href="hide?id=36478892&goto=news">hide</a> | <a href="item?id=36478892">32 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36479387'>
<td align="right" valign="top" class="title"><span class="rank">3.</span></td> <td valign="top" class="votelinks"><center><a id='up_36479387'href='vote?id=36479387&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://www.johndcook.com/blog/2023/06/23/every-factorial-is-a-power/" rel="noreferrer">Every factorial is a power</a><span class="sitebit comhead"> (<a href="from?site=johndcook.com"><span class="sitestr">johndcook.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36479387">41 points</span> by <a href="user?id=warrenm" class="hnuser">warrenm</a> <span class="age" title="2023-06-26T13:24:53"><a href="item?id=36479387">1 hour ago</a></span> <span id="unv_36479387"></span> | <a href="hide?id=36479387&goto=news">hide</a> | <a href="item?id=36479387">22 comments</a> </span>
</td></tr>
............
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36469297'>
<td align="right" valign="top" class="title"><span class="rank">30.</span></td> <td valign="top" class="votelinks"><center><a id='up_36469297'href='vote?id=36469297&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://github.com/TeamHypersomnia/Hypersomnia">Show HN: Open-source shooter which made it to AC: Valhalla and Skydio drones</a><span class="sitebit comhead"> (<a href="from?site=github.com/teamhypersomnia"><span class="sitestr">github.com/teamhypersomnia</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36469297">251 points</span> by <a href="user?id=geneotech" class="hnuser">geneotech</a> <span class="age" title="2023-06-25T15:34:42"><a href="item?id=36469297">18 hours ago</a></span> <span id="unv_36469297"></span> | <a href="hide?id=36469297&goto=news">hide</a> | <a href="item?id=36469297">35 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="morespace" style="height:10px"></tr><tr><td colspan="2"></td>
<td class='title'><a href='?p=2' class='morelink' rel='next'>More</a></td> </tr>
</table>
</td></tr>
<tr><td><img src="s.gif" height="10" width="0"><table width="100%" cellspacing="0" cellpadding="1"><tr><td bgcolor="#ff6600"></td></tr></table><br>
<center><span class="yclinks"><a href="newsguidelines.html">Guidelines</a> | <a href="newsfaq.html">FAQ</a> | <a href="lists">Lists</a> | <a href="https://github.com/HackerNews/API">API</a> | <a href="security.html">Security</a> | <a href="https://www.ycombinator.com/legal/">Legal</a> | <a href="https://www.ycombinator.com/apply/">Apply to YC</a> | <a href="mailto:hn@ycombinator.com">Contact</a></span><br><br>
<form method="get" action="//hn.algolia.com/">Search: <input type="text" name="q" size="17" autocorrect="off" spellcheck="false" autocapitalize="off" autocomplete="false"></form></center></td></tr> </table></center></body>
<script type='text/javascript' src='hn.js?ZjsaulklTz22B6Rfir3c'></script>
</html>"
Abstract’s Web Scraping API is a simple yet powerful REST API used to extract data from a given URL. To make a request, you simply include the target URL and your API key, and Abstract’s API will return the data from that site.
curl --request GET \
--url https://scrape.abstractapi.com/v1
"<html lang="en" op="news"><head><meta name="referrer" content="origin"><meta name="viewport" content="width=device-width, initial-scale=1.0"><link rel="stylesheet" type="text/css" href="news.css?ZjsaulklTz22B6Rfir3c">
<link rel="shortcut icon" href="favicon.ico">
<link rel="alternate" type="application/rss+xml" title="RSS" href="rss">
<title>Hacker News</title></head><body><center><table id="hnmain" border="0" cellpadding="0" cellspacing="0" width="85%" bgcolor="#f6f6ef">
<tr><td bgcolor="#ff6600"><table border="0" cellpadding="0" cellspacing="0" width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="https://news.ycombinator.com"><img src="y18.svg" width="18" height="18" style="border:1px white solid; display:block"></a></td>
<td style="line-height:12pt; height:10px;"><span class="pagetop"><b class="hnname"><a href="news">Hacker News</a></b>
<a href="newest">new</a> | <a href="front">past</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="show">show</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a> </span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop">
<a href="login?goto=news">login</a>
</span></td>
</tr></table></td></tr>
<tr id="pagespace" title="" style="height:10px"></tr><tr><td>
<table border="0" cellpadding="0" cellspacing="0">
<tr class='athing' id='36478206'>
<td align="right" valign="top" class="title"><span class="rank">1.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478206'href='vote?id=36478206&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://matan-h.com/google-has-a-secret-browser-hidden-inside-the-settings/" rel="noreferrer">Google has a secret browser hidden inside the settings</a><span class="sitebit comhead"> (<a href="from?site=matan-h.com"><span class="sitestr">matan-h.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478206">412 points</span> by <a href="user?id=matan-h" class="hnuser">matan-h</a> <span class="age" title="2023-06-26T11:13:03"><a href="item?id=36478206">3 hours ago</a></span> <span id="unv_36478206"></span> | <a href="hide?id=36478206&goto=news">hide</a> | <a href="item?id=36478206">140 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36478892'>
<td align="right" valign="top" class="title"><span class="rank">2.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478892'href='vote?id=36478892&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://faultlore.com/blah/text-hates-you/" rel="noreferrer">Text Rendering Hates You (2019)</a><span class="sitebit comhead"> (<a href="from?site=faultlore.com"><span class="sitestr">faultlore.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478892">96 points</span> by <a href="user?id=subset" class="hnuser">subset</a> <span class="age" title="2023-06-26T12:32:55"><a href="item?id=36478892">2 hours ago</a></span> <span id="unv_36478892"></span> | <a href="hide?id=36478892&goto=news">hide</a> | <a href="item?id=36478892">32 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36479387'>
<td align="right" valign="top" class="title"><span class="rank">3.</span></td> <td valign="top" class="votelinks"><center><a id='up_36479387'href='vote?id=36479387&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://www.johndcook.com/blog/2023/06/23/every-factorial-is-a-power/" rel="noreferrer">Every factorial is a power</a><span class="sitebit comhead"> (<a href="from?site=johndcook.com"><span class="sitestr">johndcook.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36479387">41 points</span> by <a href="user?id=warrenm" class="hnuser">warrenm</a> <span class="age" title="2023-06-26T13:24:53"><a href="item?id=36479387">1 hour ago</a></span> <span id="unv_36479387"></span> | <a href="hide?id=36479387&goto=news">hide</a> | <a href="item?id=36479387">22 comments</a> </span>
</td></tr>
............
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36469297'>
<td align="right" valign="top" class="title"><span class="rank">30.</span></td> <td valign="top" class="votelinks"><center><a id='up_36469297'href='vote?id=36469297&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://github.com/TeamHypersomnia/Hypersomnia">Show HN: Open-source shooter which made it to AC: Valhalla and Skydio drones</a><span class="sitebit comhead"> (<a href="from?site=github.com/teamhypersomnia"><span class="sitestr">github.com/teamhypersomnia</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36469297">251 points</span> by <a href="user?id=geneotech" class="hnuser">geneotech</a> <span class="age" title="2023-06-25T15:34:42"><a href="item?id=36469297">18 hours ago</a></span> <span id="unv_36469297"></span> | <a href="hide?id=36469297&goto=news">hide</a> | <a href="item?id=36469297">35 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="morespace" style="height:10px"></tr><tr><td colspan="2"></td>
<td class='title'><a href='?p=2' class='morelink' rel='next'>More</a></td> </tr>
</table>
</td></tr>
<tr><td><img src="s.gif" height="10" width="0"><table width="100%" cellspacing="0" cellpadding="1"><tr><td bgcolor="#ff6600"></td></tr></table><br>
<center><span class="yclinks"><a href="newsguidelines.html">Guidelines</a> | <a href="newsfaq.html">FAQ</a> | <a href="lists">Lists</a> | <a href="https://github.com/HackerNews/API">API</a> | <a href="security.html">Security</a> | <a href="https://www.ycombinator.com/legal/">Legal</a> | <a href="https://www.ycombinator.com/apply/">Apply to YC</a> | <a href="mailto:hn@ycombinator.com">Contact</a></span><br><br>
<form method="get" action="//hn.algolia.com/">Search: <input type="text" name="q" size="17" autocorrect="off" spellcheck="false" autocapitalize="off" autocomplete="false"></form></center></td></tr> </table></center></body>
<script type='text/javascript' src='hn.js?ZjsaulklTz22B6Rfir3c'></script>
</html>"
api_key
and a target URL
, and the API will return the data from that site.
https://scrape.abstractapi.com/v1/
api_key
and the target URL
you’d like to scrape:
https://scrape.abstractapi.com/v1/
? api_key = YOUR_UNIQUE_API_KEY
& url = https://news.ycombinator.com
"<html lang="en" op="news"><head><meta name="referrer" content="origin"><meta name="viewport" content="width=device-width, initial-scale=1.0"><link rel="stylesheet" type="text/css" href="news.css?ZjsaulklTz22B6Rfir3c">
<link rel="shortcut icon" href="favicon.ico">
<link rel="alternate" type="application/rss+xml" title="RSS" href="rss">
<title>Hacker News</title></head><body><center><table id="hnmain" border="0" cellpadding="0" cellspacing="0" width="85%" bgcolor="#f6f6ef">
<tr><td bgcolor="#ff6600"><table border="0" cellpadding="0" cellspacing="0" width="100%" style="padding:2px"><tr><td style="width:18px;padding-right:4px"><a href="https://news.ycombinator.com"><img src="y18.svg" width="18" height="18" style="border:1px white solid; display:block"></a></td>
<td style="line-height:12pt; height:10px;"><span class="pagetop"><b class="hnname"><a href="news">Hacker News</a></b>
<a href="newest">new</a> | <a href="front">past</a> | <a href="newcomments">comments</a> | <a href="ask">ask</a> | <a href="show">show</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a> </span></td><td style="text-align:right;padding-right:4px;"><span class="pagetop">
<a href="login?goto=news">login</a>
</span></td>
</tr></table></td></tr>
<tr id="pagespace" title="" style="height:10px"></tr><tr><td>
<table border="0" cellpadding="0" cellspacing="0">
<tr class='athing' id='36478206'>
<td align="right" valign="top" class="title"><span class="rank">1.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478206'href='vote?id=36478206&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://matan-h.com/google-has-a-secret-browser-hidden-inside-the-settings/" rel="noreferrer">Google has a secret browser hidden inside the settings</a><span class="sitebit comhead"> (<a href="from?site=matan-h.com"><span class="sitestr">matan-h.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478206">412 points</span> by <a href="user?id=matan-h" class="hnuser">matan-h</a> <span class="age" title="2023-06-26T11:13:03"><a href="item?id=36478206">3 hours ago</a></span> <span id="unv_36478206"></span> | <a href="hide?id=36478206&goto=news">hide</a> | <a href="item?id=36478206">140 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36478892'>
<td align="right" valign="top" class="title"><span class="rank">2.</span></td> <td valign="top" class="votelinks"><center><a id='up_36478892'href='vote?id=36478892&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://faultlore.com/blah/text-hates-you/" rel="noreferrer">Text Rendering Hates You (2019)</a><span class="sitebit comhead"> (<a href="from?site=faultlore.com"><span class="sitestr">faultlore.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36478892">96 points</span> by <a href="user?id=subset" class="hnuser">subset</a> <span class="age" title="2023-06-26T12:32:55"><a href="item?id=36478892">2 hours ago</a></span> <span id="unv_36478892"></span> | <a href="hide?id=36478892&goto=news">hide</a> | <a href="item?id=36478892">32 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36479387'>
<td align="right" valign="top" class="title"><span class="rank">3.</span></td> <td valign="top" class="votelinks"><center><a id='up_36479387'href='vote?id=36479387&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://www.johndcook.com/blog/2023/06/23/every-factorial-is-a-power/" rel="noreferrer">Every factorial is a power</a><span class="sitebit comhead"> (<a href="from?site=johndcook.com"><span class="sitestr">johndcook.com</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36479387">41 points</span> by <a href="user?id=warrenm" class="hnuser">warrenm</a> <span class="age" title="2023-06-26T13:24:53"><a href="item?id=36479387">1 hour ago</a></span> <span id="unv_36479387"></span> | <a href="hide?id=36479387&goto=news">hide</a> | <a href="item?id=36479387">22 comments</a> </span>
</td></tr>
............
<tr class="spacer" style="height:5px"></tr>
<tr class='athing' id='36469297'>
<td align="right" valign="top" class="title"><span class="rank">30.</span></td> <td valign="top" class="votelinks"><center><a id='up_36469297'href='vote?id=36469297&how=up&goto=news'><div class='votearrow' title='upvote'></div></a></center></td><td class="title"><span class="titleline"><a href="https://github.com/TeamHypersomnia/Hypersomnia">Show HN: Open-source shooter which made it to AC: Valhalla and Skydio drones</a><span class="sitebit comhead"> (<a href="from?site=github.com/teamhypersomnia"><span class="sitestr">github.com/teamhypersomnia</span></a>)</span></span></td></tr><tr><td colspan="2"></td><td class="subtext"><span class="subline">
<span class="score" id="score_36469297">251 points</span> by <a href="user?id=geneotech" class="hnuser">geneotech</a> <span class="age" title="2023-06-25T15:34:42"><a href="item?id=36469297">18 hours ago</a></span> <span id="unv_36469297"></span> | <a href="hide?id=36469297&goto=news">hide</a> | <a href="item?id=36469297">35 comments</a> </span>
</td></tr>
<tr class="spacer" style="height:5px"></tr>
<tr class="morespace" style="height:10px"></tr><tr><td colspan="2"></td>
<td class='title'><a href='?p=2' class='morelink' rel='next'>More</a></td> </tr>
</table>
</td></tr>
<tr><td><img src="s.gif" height="10" width="0"><table width="100%" cellspacing="0" cellpadding="1"><tr><td bgcolor="#ff6600"></td></tr></table><br>
<center><span class="yclinks"><a href="newsguidelines.html">Guidelines</a> | <a href="newsfaq.html">FAQ</a> | <a href="lists">Lists</a> | <a href="https://github.com/HackerNews/API">API</a> | <a href="security.html">Security</a> | <a href="https://www.ycombinator.com/legal/">Legal</a> | <a href="https://www.ycombinator.com/apply/">Apply to YC</a> | <a href="mailto:hn@ycombinator.com">Contact</a></span><br><br>
<form method="get" action="//hn.algolia.com/">Search: <input type="text" name="q" size="17" autocorrect="off" spellcheck="false" autocapitalize="off" autocomplete="false"></form></center></td></tr> </table></center></body>
<script type='text/javascript' src='hn.js?ZjsaulklTz22B6Rfir3c'></script>
</html>"
&
character would be encoded to %26
.[
{
"path": "path",
"value": "value",
"name": "name",
"domain": "domain"
},
{
"path": "path",
"value": "value",
"name": "name",
"domain": "domain"
}
]