WIZ-SCRAPER03.ALT

This is an OutWit Tutorial file.


Testing Browser...
OutWit Hub Pro - Calculations in Scrapers
Application walkthrough: using scraper functions
userSpace.setWizardPrefs = function setWizardPrefs(){ wizardKit.setWizardPref("browse.tempo.min", "2000"); wizardKit.setWizardPref("browse.tempo.max", "3500"); wizardKit.setWizardPref("images.ondemandonly", false); // witscript.setPreference("page.ignorePlugins", false); wizardKit.setWizardPref("page.ignoreImages", false); wizardKit.setWizardPref("tableMinRows", "1"); //alert(witscript.getPreference("DOMSourceDontWarn")); wizardKit.setWizardPref("DOMSourceDontWarn", true); //alert(witscript.getPreference("DOMSourceDontWarn")); }


A Scraper With Totals


While this window is showing instructions, the user interface of OutWit Hub remains operational.

You can still interact normally with the application and you can move this tutorial window around on the screen to better see the parts of the interface that you want.

//alert(navigator.userAgent); //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:20.0) Gecko/20100101 Firefox/20.0 //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:19.0) Gecko/20130415 Firefox/19.0.2 //Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:19.0) Gecko/20130416 OutWit-Hub/3.0.3.63 userSpace.waitOK = witscript.version("4") || !/Firefox\/2\d\./.test(navigator.userAgent); userSpace.eyeCatcherOK = !(wizardKit.platform=="mac" && /firefox/i.test(navigator.userAgent) && /rv:1[2-7]/i.test(navigator.userAgent)); if (/Firefox\/2\d\./.test(navigator.userAgent) && !witscript.version("4")) { userSpace.eyeCatcherOK = false; wizardKit.typeCellValue = function typeCellValue(tree, row, column, value){ tree.setCellValue(row, column, value); tree.startEditing(row, column); //witscript.wait(200); tree.stopEditing(true); //witscript.wait(200); }; } if (/Firefox\/[23]\./.test(navigator.userAgent)){ alert("OutWit wizards cannot run on your version of Firefox. Please update to the current version and try again."); wizard.close(); } else if (!("witscript" in window) || !witscript.version || !witscript.version("3")){ alert("This wizard is not compatible with your version of the OutWit Kernel. Please download the latest version (3.0.1 or higher)"); wizard.close(); } else if (!witscript.proLicense()){ alert("Sorry, this wizard can only run with a Pro or Enterprise edition of OutWit Hub."); witscript.views.page.load('http://www.outwit.com/support/help/hub/tutorials/outwit-hub-tutorials.html'); } else if (!witscript.version("4.0.7.105") && !/^3\./.test(witscript.version())){ alert("Sorry, this wizard can only run with OutWit Hub version 4.0.7.105 or later."); witscript.views.page.load('http://www.outwit.com/support/help/hub/tutorials/outwit-hub-tutorials.html'); } if(witscript.version("3")){$(".owui-wizard-homelink").html("Hub Tutorials")}; wizardKit.hideCatch(); wizardKit.hideLog(); if((/earthquake/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://www.outwit.com/"); } //userSpace.storeOriginalPrefs(); userSpace.setWizardPrefs(); witscript.logPanel.setAttribute("height",0);
wizardKit.say(this.parentNode); witscript.views.page.display();
wizardKit.say(this.parentNode); wizardKit.hideCatch(); wizardKit.hideLog(); if(!(/earthquake.usgs.gov\/earthquakes\/map/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://earthquake.usgs.gov/earthquakes/map/"); } witscript.views.page.display(); witscript.menutree.focus();



Today's Tremors Index
Earthquakes around the world


Please click on the list icon at the top right corner of the page, as indicated by the site's instructions. This U.S. Geological Survey page lists the main earthquakes that occurred in the world during the past 24 hours.

Let us make a synoptic dashboard with this data.

userSpace.fillScraper = function(){ witscript.views.scrapers.editor.datasheet.setCellValue(0, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(0, 2, "Page Title"); witscript.views.scrapers.editor.datasheet.setCellValue(0, 3, '<title'); witscript.views.scrapers.editor.datasheet.setCellValue(0, 4, "</title>"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(1, 2, "CumulatedMagnitude"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 3, 'callout">'); witscript.views.scrapers.editor.datasheet.setCellValue(1, 4, "</span>"); witscript.views.scrapers.editor.datasheet.setCellValue(1, 6, "#SUM#"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(2, 2, "Average"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 3, 'callout">'); witscript.views.scrapers.editor.datasheet.setCellValue(2, 4, "</span>"); witscript.views.scrapers.editor.datasheet.setCellValue(2, 6, "#AVERAGE#"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(3, 2, "Maximum"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 3, 'callout">'); witscript.views.scrapers.editor.datasheet.setCellValue(3, 4, "</span>"); witscript.views.scrapers.editor.datasheet.setCellValue(3, 6, "#MAX#"); witscript.views.scrapers.editor.datasheet.setCellValue(4, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(4, 2, "Number"); witscript.views.scrapers.editor.datasheet.setCellValue(4, 3, 'callout">'); witscript.views.scrapers.editor.datasheet.setCellValue(4, 4, "</span>"); witscript.views.scrapers.editor.datasheet.setCellValue(4, 6, "#COUNT#"); witscript.views.scrapers.editor.datasheet.setCellValue(5, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(5, 2, "Locations"); witscript.views.scrapers.editor.datasheet.setCellValue(5, 3, 'header">'); witscript.views.scrapers.editor.datasheet.setCellValue(5, 4, "</"); witscript.views.scrapers.editor.datasheet.setCellValue(5, 6, "#CONCAT#"); witscript.views.scrapers.editor.datasheet.setCellValue(6, 1, true); witscript.views.scrapers.editor.datasheet.setCellValue(6, 2, "Latest Event"); witscript.views.scrapers.editor.datasheet.setCellValue(6, 3, 'subheader">'); witscript.views.scrapers.editor.datasheet.setCellValue(6, 4, "</"); witscript.views.scrapers.editor.datasheet.setCellValue(6, 6, "#MAX#"); witscript.menutree.focus(); } wizardKit.say(this.parentNode); witscript.views.scrapers.display(); if(witscript.views.scrapers.editor.isVisible){ if(witscript.views.scrapers.editor.isVisible()){ witscript.views.scrapers.editor.manageButton.click(); if(userSpace.waitOK) witscript.wait(7000,function() {return witscript.views.scrapers.manager.isVisible();}); } } else { witscript.views.scrapers.editor.saveButton.click(); if(userSpace.waitOK) witscript.wait(1000); witscript.views.scrapers.editor.manageButton.click(); } wizardKit.hideCatch(); wizardKit.hideLog(); if(!(/earthquake.usgs.gov\/earthquakes\/map/.test(witscript.toolbar.urlBar.getValue()))){ witscript.views.page.load("http://earthquake.usgs.gov/earthquakes/map/"); if(userSpace.waitOK) witscript.wait(10000); } //alert(1) witscript.views.scrapers.editor.sourceSelector.DOM.click() //alert(2) var currentAutomator = witscript.views.scrapers.manager.currentAutomator(); if (currentAutomator && currentAutomator.automatorId == -1) { // XXX seems outdated. Check and remove witscript.views.scrapers.editor.manageButton.click(); } if (!currentAutomator || !userSpace.automatorName || userSpace.automatorName != currentAutomator.name || !(/Tutorial Earthquake/.test(currentAutomator.name))) { witscript.views.scrapers.manager.createAutomator("Tutorial Earthquake Scraper"); userSpace.automatorName = views.scrapers.manager.currentAutomator().name; witscript.views.scrapers.editor.url.setValue("http://earthquake.usgs.gov/earthquakes/map/"); } witscript.views.scrapers.manager.editButton.click(); if(userSpace.waitOK) witscript.wait(1000); if(userSpace.waitOK) witscript.wait(1000,function(){return views.scrapers.source.isLoaded();}); if(userSpace.waitOK) witscript.wait(300); witscript.views.scrapers.editor.display(); if(userSpace.waitOK) witscript.wait(100); witscript.views.scrapers.editor.datasheet.focus(); if(userSpace.waitOK) witscript.wait(100); witscript.views.scrapers.source.scrollToPercent(.4); if(userSpace.waitOK) witscript.wait(100); witscript.views.scrapers.source.findBar.toggleHighlight(false); witscript.views.scrapers.source.findBar.toggleHighlight(true, '/class="mag"/gi'); if(userSpace.waitOK) witscript.wait(100); userSpace.fillScraper(); if(userSpace.waitOK) witscript.wait(1500);

Here is our scraper

The page contains a table of the latest events with their time, magnitude and location. Instead of grabbing all these elements, we want to display the sum, average and maximum of the magnitudes, the number of earthquakes, a list of the locations and the time of the latest.

All this is done by placing the corresponding function in the replace field.

userSpace.fillScraper(); witscript.views.scrapers.editor.sourceSelector.DOM.click();

This is an AJAX site and the data is dynamically added to the page. The source should therefore be set to dynamic (pale yellow background).

wizardKit.say(this.parentNode);

Applying the scraper

userSpace.fillScraper(); witscript.views.scrapers.editor.executeButton.click(); witscript.menutree.focus();

Instead of creating a new column for each occurrence, the #SUM#, #COUNT#, #MAX#, #MIN#, #AVERAGE# and #CONCAT# functions placed in the replace field, consolidate all the matches in a single computed cell.

wizardKit.say(this.parentNode);
// userSpace.WTI = views.scraped.datasheet.getCell(0,5); // if (!userSpace.WTI) { // if(userSpace.waitOK) witscript.wait(3000); // userSpace.WTI = views.scraped.datasheet.getCell(0,5); // } // userSpace.WTI = userSpace.WTI?"The Tremor Index is "+userSpace.WTI.replace(/\.[\s\S]+$/,""):""; // userSpace.WTI = witscript.version("4") || /day/i.test(userSpace.WTI)?"The Tremor Index is "+views.scraped.datasheet.getCell(0,6).replace(/\.[\s\S]+$/,""):userSpace.WTI; userSpace.WTI = views.scraped.datasheet.getCell(0,7); if (!userSpace.WTI || userSpace.WTI.substr(0,5) == "Click") { witscript.views.scrapers.editor.executeButton.click(); if(userSpace.waitOK) witscript.wait(3000); userSpace.WTI = views.scraped.datasheet.getCell(0,7); } if(userSpace.WTI && !/^Click/.test(userSpace.WTI) && !/day/i.test(userSpace.WTI)){ userSpace.WTI = "The Tremor Index is " + userSpace.WTI.replace(/\.[\s\S]+$/,""); } else if((witscript.version("4") || /day/i.test(userSpace.WTI)) && views.scraped.datasheet.getCell(0,6)){ userSpace.WTI = views.scraped.datasheet.getCell(0,6); userSpace.WTI = userSpace.WTI ? "The Tremor Index is " + userSpace.WTI.replace(/\.[\s\S]+$/,"") : "Click on 'Back' to reapply the scraper."; } else if (/^Click/.test(userSpace.WTI)){ userSpace.WTI = ""; } else { userSpace.WTI = "Click on 'Back' to reapply the scraper."; } //alert(userSpace.WTI); $("#WTI").html(userSpace.WTI) wizardKit.say(this.parentNode); // wizardKit.restoreOriginalPrefs(); // XXX JC: This should not be here. Move to the close button (or event) witscript.menutree.focus(); $(".owui-wizard-homelink").attr("style","color: #DFFFF9 !important; float:left;").html("More Tutorials");


With these functions and the many others you will find in the help center, you can feed excel spreadsheets, databases or websites with readily usable data.

We will publish other tutorials to lead you through the main features of OutWit Hub. Stay tuned.