How to make a
good scraper
better.

While this window is showing instructions, the user interface of OutWit Hub remains operational.

You can still interact normally with the application and you can move this tutorial window around on the screen to better see the parts of the interface that you want.

//alert(navigator.userAgent); //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:20.0) Gecko/20100101 Firefox/20.0 //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:19.0) Gecko/20130415 Firefox/19.0.2 //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:19.0) Gecko/20130416 OutWit-Hub/3.0.3.63 userSpace.waitOK = witscript.version("4") || !/Firefox\/2\d\./.test(navigator.userAgent); userSpace.eyeCatcherOK = !(wizardKit.platform=="mac" && /firefox/i.test(navigator.userAgent) && /rv:1[2-7]/i.test(navigator.userAgent)); if (/Firefox\/2\d\./.test(navigator.userAgent) && !witscript.version("4")) { userSpace.eyeCatcherOK = false; wizardKit.typeCellValue = function typeCellValue(tree, row, column, value){ tree.setCellValue(row, column, value); tree.startEditing(row, column); //witscript.wait(200); tree.stopEditing(true); //witscript.wait(200); }; } if (/Firefox\/[23]\./.test(navigator.userAgent)){ alert("OutWit wizards cannot run on your version of Firefox. Please update to the current version and try again."); wizard.close(); } else if (!("witscript" in window) || !witscript.version || !witscript.version("2.0.1")){ alert("This wizard is not compatible with your version of the OutWit Kernel. Please download the latest version (2.0.1 or higher)"); wizard.close(); } if(witscript.version()=="2.1.1.4"){ alert("Version 2.1.1.5 was released with important fixes. Please update and restart the tutorial."); } if(witscript.version("2.1")){$(".owui-wizard-homelink").html("Hub Tutorials")}; if((/tutorials\/sample_list\.html/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://www.outwit.com/"); } userSpace.storeOriginalPrefs(); userSpace.setWizardPrefs(); witscript.logPanel.setAttribute("height",0);

wizardKit.hideCatch(); wizardKit.hideLog(); wizardKit.say(this.parentNode); // views.page.display();

// For some reason a script in owui-wizard-page is not executed if there are steps: // alert("This is page 2.0");

wizardKit.say(this.parentNode); wizardKit.hideCatch(); wizardKit.hideLog(); if(!(/tutorials\/sample_list\.html/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://www.outwit.com/support/help/hub/tutorials/sample_list.html"); } witscript.views.page.display(); witscript.menutree.focus();

Here is the sample data

In the first tutorial about making a scraper, we have seen how to extract the population data on this page.

wizardKit.hideCatch(); wizardKit.hideLog(); if(!(/tutorials\/sample_list\.html/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://www.outwit.com/support/help/hub/tutorials/sample_list.html"); } var theTitle = "Here is our simple scraper"; var theText = "The scraper we have made in the previous tutorial extracts all the data, using the most remarkable markers that are present in the source code.<br/> It extracts the data as three fields only per record."; $("#p2Title").html(theTitle); $("#p2Text").html(theText); wizardKit.say(theTitle.replace(/<[^<>]+>/g,"")+". "+theText.replace(/<[^<>]+>/g,"")); if(userSpace.waitOK) witscript.wait(500); witscript.views.scrapers.display(); if(witscript.views.scrapers.editor.isVisible){ if(witscript.views.scrapers.editor.isVisible()){ witscript.views.scrapers.editor.manageButton.click(); if(userSpace.waitOK) witscript.wait(7000,function(){return witscript.views.scrapers.manager.isVisible();}); } } else { witscript.views.scrapers.editor.saveButton.click(); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.manageButton.click(); } witscript.page.setAttribute("height",0) var currentAutomator = views.scrapers.manager.currentAutomator(); if (currentAutomator && currentAutomator.automatorId == -1) { // XXX seems outdated. Check and remove witscript.views.scrapers.editor.manageButton.click(); } if (!currentAutomator || !userSpace.automatorName || userSpace.automatorName != currentAutomator.name || !(/Tutorial Advanced/.test(currentAutomator.name))) { witscript.views.scrapers.manager.createAutomator("Tutorial Advanced Scraper"); userSpace.automatorName = views.scrapers.manager.currentAutomator().name; witscript.views.scrapers.editor.url.setValue("http://www.outwit.com/support/help/hub/tutorials/sample_list.html"); } //alert("opening scraper"); witscript.views.scrapers.manager.editButton.click(); if(userSpace.waitOK) witscript.wait(3000,function(){return views.scrapers.source.isLoaded();}); witscript.wait(300); witscript.views.scrapers.editor.sourceSelector.static.click() witscript.views.scrapers.editor.display(); witscript.wait(100); witscript.views.scrapers.editor.datasheet.focus(); witscript.wait(100); witscript.views.scrapers.source.scrollToPercent(.4); witscript.wait(100); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.source.findBar.toggleHighlight(true, "/width=\"22\"> |\$|\$:|inhab.<\\/li>/gi"); witscript.wait(100); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.menutree.focus();

Let's try to go further

Say we want more detail than this:
The country should be in a separate column. We may also want to split the coordinates in two different fields. And, finally, a piece of information in the page has simply been lost: the continent.

We can grab all this and more.

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 7, ""); witscript.views.scrapers.editor.datasheet.setCellValue(0, 8, ""); witscript.views.scrapers.editor.datasheet.setCellValue(1, 7, ""); witscript.views.scrapers.editor.datasheet.setCellValue(1, 8, ""); //views.scrapers.editor.datasheet.startEditing(0, 8); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus();

wizardKit.say(this.parentNode); wizardKit.hideCatch(); wizardKit.hideLog(); if(!(/tutorials\/sample_list\.html/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://www.outwit.com/support/help/hub/tutorials/sample_list.html"); } witscript.views.page.display(); witscript.menutree.focus();

Splitting City and Country

City and country are separated with a comma. We can use this for our purpose.

var theTitle = "Splitting City and Country"; var theText = "We just need to put a comma in the Separator field and the labels of the columns we want in 'List of Labels'."; $("#p3Title").html(theTitle); $("#p3Text").html(theText); // need to make sure the scraper is set even if click fast for next page //witscript.wait(100); witscript.views.scrapers.editor.datasheet.focus(); witscript.page.setAttribute("height",0) if(userSpace.waitOK) witscript.wait(500); wizardKit.say(theTitle.replace(/<[^<>]+>/g,"")+". "+theText.replace(/<[^<>]+>/g,"")); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.views.scrapers.editor.datasheet.select(0); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 0, 7, ","); wizardKit.typeCellValue(views.scrapers.editor.datasheet,0, 8, "City,Country"); witscript.menutree.focus();

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 7, ","); witscript.views.scrapers.editor.datasheet.setCellValue(0, 8, "City,Country"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 7, ""); witscript.views.scrapers.editor.datasheet.setCellValue(1, 8, ""); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus();

... Voilà! City and Country are now in two separate columns.

IMPORTANT NOTE: "City" is the first field. It will be considered as the record delimiter by OutWit Hub. This means that if you had chosen another field as a delimiter, the data rows might have been cut in the middle. In order to make sure the scraper "wraps" the data rows as it should, try to follow the order of appearance of the data elements in the source code when you build your scraper.

wizardKit.say(this.parentNode);

if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.datasheet.focus(); witscript.page.setAttribute("height",0) if(userSpace.waitOK) witscript.wait(500); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 7, ","); witscript.views.scrapers.editor.datasheet.setCellValue(0, 8, "City,Country"); witscript.views.scrapers.editor.datasheet.select(1); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 1, 7, ","); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 1, 8, "Latitude,Longitude"); witscript.menutree.focus();

Splitting the Coordinates

Latitude and Longitude are also separated with a comma. Let's use the Separator field the same way we just did.

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "City"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, "width=\"22\"> "); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, " ("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "Coordinates"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, "("); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Population"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, "):"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "inhab.</li>"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 7, ","); witscript.views.scrapers.editor.datasheet.setCellValue(0, 8, "City,Country"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 7, ","); witscript.views.scrapers.editor.datasheet.setCellValue(1, 8, "Latitude,Longitude"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 1, false); witscript.views.scrapers.editor.datasheet.setCellValue(3, 2, ""); witscript.views.scrapers.editor.datasheet.setCellValue(3, 3, ""); witscript.views.scrapers.editor.datasheet.setCellValue(3, 4, ""); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus();

Our data is now extracted as five separate columns, like we wanted.

The only one missing now, is the continent...

The continent is at a different (higher) level in the HTML list. This is why we couldn't grab it until now.

wizardKit.say(this.parentNode); witscript.views.scrapers.source.scrollToPercent(.4); witscript.views.scrapers.editor.datasheet.select(3); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.source.findBar.toggleHighlight(true, "/<li>[^<>]+<ol>/gi"); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.display(); witscript.page.setAttribute("height",0) witscript.menutree.focus();

witscript.views.scrapers.editor.datasheet.setCellValue(3, 1, false); witscript.views.scrapers.editor.datasheet.setCellValue(3, 2, ""); witscript.views.scrapers.editor.datasheet.setCellValue(3, 3, ""); witscript.views.scrapers.editor.datasheet.setCellValue(3, 4, "");

The continent names are not repeated on every row. They are on the first hierarchical level of the HTML list.

This means that the scraper, which has only one level of records, must create another field for this piece of information and repeat it in every data row.

wizardKit.say(this.parentNode); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.display(); if(userSpace.waitOK) witscript.wait(500); witscript.page.setAttribute("height",0) witscript.views.scrapers.editor.datasheet.select(3); witscript.views.scrapers.editor.datasheet.focus(); if (userSpace.eyeCatcherOK) wizardKit.eyeCatcher(views.scrapers.editor,.5,.5,50,50); witscript.views.scrapers.editor.datasheet.setCellValue(3, 1, true); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 3, 2, "#repeat#Continent"); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 3, 3, "<li>"); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 3, 4, "<ol>"); witscript.menutree.focus();

This is the purpose of the #repeat# directive.

(Aren't we lucky?)
Directives are a set of additional commands that help you instruct the scraper to alter its normal behaviour.
A directive must be entered in the Description field and surrounded with pound signs (see help). By typing #repeat#myFieldName in the description of a scraper line, you can add the data scraped by this line to an additional column named 'myFieldName' for each grabbed record.

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.datasheet.setCellValue(3, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(3, 2, "#repeat#Continent"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 3, "<li>"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 4, "<ol>"); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus();

Yesss!... This looks more like it.

wizardKit.say(this.parentNode); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.source.scrollToPercent(.4); witscript.views.scrapers.source.findBar.toggleHighlight(false); //views.scrapers.source.findBar.toggleHighlight(true, "/<li>[^<>]+<ol>/gi"); witscript.views.scrapers.editor.display(); witscript.menutree.focus();

Images

The last elements we need to extract are the flags.

witscript.views.scrapers.editor.datasheet.setCellValue(4, 1, false); witscript.views.scrapers.editor.datasheet.setCellValue(4, 2, ""); witscript.views.scrapers.editor.datasheet.setCellValue(4, 3, ""); witscript.views.scrapers.editor.datasheet.setCellValue(4, 4, "");

wizardKit.say(this.parentNode); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.editor.display(); witscript.views.scrapers.editor.datasheet.focus(); if(userSpace.waitOK) witscript.wait(1000); witscript.page.setAttribute("height",0) witscript.views.scrapers.editor.datasheet.select(4); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 4, 1, true); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 4, 2, "Flag"); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 4, 3, "<li><img src=\""); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 4, 4, "\""); witscript.views.scrapers.source.scrollToPercent(.4); if(witscript.version("5.0")) { thePattern = witscript.lineRegExp( { ok:true, description:views.scrapers.editor.datasheet.getCell(4, 2), before:witscript.convertLiterals(views.scrapers.editor.datasheet.getCell(4, 3)), after:witscript.convertLiterals(views.scrapers.editor.datasheet.getCell(4, 4)), format:"" }); } else { thePattern = witscript.lineRegExp([views.scrapers.editor.datasheet.getCell(4, 2),witscript.convertLiterals(views.scrapers.editor.datasheet.getCell(4, 3)),witscript.convertLiterals(views.scrapers.editor.datasheet.getCell(4, 4)),""]); } witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.source.findBar.toggleHighlight(true, "/"+thePattern+"/gi"); witscript.menutree.focus();

Grabbing the Image URLs

We have now added a new field to our scraper to extract the URL of the country flag for each City. The image URLs are located in img tags and we just need to grab the string located between the double quotes.

// set the scraper values

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.datasheet.setCellValue(4, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(4, 2, "Flag"); witscript.views.scrapers.editor.datasheet.setCellValue(4, 3, "<li><img src=\""); witscript.views.scrapers.editor.datasheet.setCellValue(4, 4, "\""); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus(); witscript.page.setAttribute("height",0)

Almost Good...

The image URLs are now scraped, but two problems remain:

1. the URLs are offset by one row.
2. they are "relative links": the first part is missing.

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.display(); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.datasheet.focus(); witscript.views.scrapers.editor.datasheet.select(4); witscript.views.scrapers.source.scrollToPercent(.4); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.editor.datasheet.focus();

Making sure the record delimiter is first in the scraper

Solving problem #1 is easy: the flag URL is on the last line of our scraper, but it appears first in the page source code. Dragging the fifth line of the scraper up to the first position will solve this issue.

witscript.views.scrapers.editor.display(); witscript.views.scrapers.editor.datasheet.focus(); if (views.scrapers.editor.datasheet.getCell(0, 2)=="Flag") { witscript.views.scrapers.editor.datasheet.setCellValue(0, 6, ""); witscript.views.scrapers.editor.datasheet.moveRow(0,5); witscript.views.scrapers.editor.datasheet.select(4); } witscript.views.scrapers.editor.datasheet.focus();

witscript.views.scrapers.editor.display(); witscript.views.scrapers.editor.datasheet.focus(); if (views.scrapers.editor.datasheet.getCell(4, 2)=="Flag") { witscript.views.scrapers.editor.datasheet.moveRow(4,0); witscript.views.scrapers.editor.datasheet.select(0); } witscript.views.scrapers.editor.datasheet.focus();

witscript.page.setAttribute("height",0) witscript.views.scrapers.editor.display(); witscript.views.scrapers.editor.datasheet.focus(); if (views.scrapers.editor.datasheet.getCell(0, 2)=="Flag") { witscript.views.scrapers.editor.datasheet.setCellValue(0, 6, ""); witscript.views.scrapers.editor.datasheet.select(0); } witscript.views.scrapers.editor.datasheet.focus();

wizardKit.say(this.parentNode);

The Flag row is now first and this will solve the offset problem

wizardKit.say(this.parentNode); witscript.views.scrapers.editor.display(); witscript.page.setAttribute("height",0) wizardKit.hideLog(); witscript.views.scrapers.editor.datasheet.focus(); witscript.views.scrapers.editor.datasheet.select(0); if (views.scrapers.editor.datasheet.getCell(0, 2)=="Flag") { // if (userSpace.eyeCatcherOK) wizardKit.eyeCatcher(views.scrapers.editor,.4,.4,500,0); wizardKit.typeCellValue(views.scrapers.editor.datasheet, 0, 6, "#BASEURL#\\0"); } witscript.views.scrapers.source.scrollToPercent(.4); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.menutree.focus();

Correcting the partial URLs problem

As for the partial URL, the solution consists in adding a variable in the Replace field: #BASEURL# is the path to the current file (see help).
When typing a replacement value, \0 refers to the extracted data. So the replacement string #BASEURL#\0 means the concatenation of the path to the current page and the part of the URL that was extracted.

wizardKit.say(this.parentNode);

if (views.scrapers.editor.datasheet.getCell(0, 2)=="Flag") { witscript.views.scrapers.editor.datasheet.setCellValue(0, 6, "#BASEURL#\\0"); } witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus(); userSpace.windowHeight = witscript.window && witscript.window.getSize().height ? witscript.window.getSize().height : 750; if(userSpace.waitOK) { wizardKit.resize(witscript.page, "height", null, (userSpace.windowHeight *.60),300); } else { witscript.page.setAttribute("height",userSpace.windowHeight *.60) }

Here we are.

userSpace.setWizardPrefs();

witscript.views.scrapers.display(); witscript.views.scrapers.editor.manageButton.click(); witscript.views.scraped.display(); wizardKit.say(this.parentNode); // XXX JC: This should not be here. Move to the close button (or event) witscript.menutree.focus(); userSpace.restoreOriginalPrefs(); if(witscript.version("2.1")){$(".owui-wizard-homelink").attr("style","color: #DFFFF9 !important; float:left;").html("More Tutorials")};

Congratulations!
Now, you can really be proud
of your first geeky scraper.

You can now export the results directly from the 'scraped' view or move them to your Catch and keep the data there until you decide what to do with it.

We will progressively publish other tutorials to lead you through the main features of OutWit Hub. Stay tuned.

You still haven't had enough of scrapers?... Try to make your own and, if you believe you miss a tutorial on a specific point, don't hesitate to send us your suggestions.

This is an OutWit Tutorial file.

Let's try to go further

This is the purpose of the #repeat# directive.

Images

Grabbing the Image URLs

Almost Good...

Making sure the record delimiter is first in the scraper

Correcting the partial URLs problem