Skip to content

Commit

Permalink
Merge pull request #72 from bertolo1988/developing
Browse files Browse the repository at this point in the history
Developing
  • Loading branch information
bertolo1988 authored Nov 2, 2016
2 parents a15633d + 3de8e92 commit 765456b
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 61 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ let defaultOptions = {
targetDate: Utils.getDefaultDateString(defaultDateFormat),
database: 'localhost:27017/flight-scrappper',
collection: 'flight-data',
timeout: 50000,
timeout: 80000,
browser: 'chrome',
chromedriverArgs: [],
maximize: false,
Expand Down
4 changes: 2 additions & 2 deletions flight-scrappper-app.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ let debug = require('debug')('flight-scrappper-app.js');
let Utils = require('./src/utils');

let options = {
periods: 3,
periods: 40,
interval: 48,
currency: 'EUR',
directFlight: false,
dateFormat: 'DD-MM-YYYY',
targetDate: Utils.getDefaultDateString('DD-MM-YYYY'),
database: 'localhost:27017/flight-scrappper',
collection: 'flight-data',
timeout: 50000,
timeout: 80000,
browser: 'chrome',
chromedriverArgs: ['--verbose', '--log-path=chromedriver.log'],
maximize: false,
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "flight-scrappper",
"version": "2.3.1",
"version": "2.3.2",
"description": "Web scraper made with nodejs and selenium-webdriver that gathers flight data and stores it in a mongodb database",
"homepage": "https://github.com/bertolo1988/flight-scrappper",
"author": "bertolo1988 <tiagobertolo@gmail.com> (https://github.com/bertolo1988)",
Expand All @@ -13,7 +13,7 @@
],
"main": "dist/flight-scrappper.js",
"engines": {
"node": "*"
"node": ">= 6.5.0"
},
"repository": {
"type": "git",
Expand All @@ -24,14 +24,14 @@
},
"license": "MIT",
"dependencies": {
"chromedriver": "2.24.1",
"chromedriver": "2.25.1",
"cross-env": "3.1.3",
"debug": "2.2.0",
"file-system": "2.2.1",
"moment": "2.15.2",
"mongodb": "2.2.11",
"progress-barzz": "1.0.2",
"selenium-webdriver": "2.53.2"
"selenium-webdriver": "3.0.0-beta-3"
},
"devDependencies": {
"jshint": "2.9.4",
Expand Down
96 changes: 44 additions & 52 deletions src/momondo-scrappper.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,6 @@ function momondoScrappper() {
return 'http://www.momondo.co.uk/flightsearch/?' + momondo.toString();
}

function takeScreenShot(route, targetDate) {
return driver.takeScreenshot().then((data) => {
let todayDate = Utils.getTodayDateString('DD-MM-YYYY_HH_mm');
let imgName = todayDate + '_' + route.from + '_' + route.to + '_' + targetDate + '.png';
let ssPath = 'screenshots' + path.sep;
fs.writeFileSync(ssPath + imgName, data, 'base64');
debug('Screenshot saved at ' + ssPath + imgName + ' !');
});
}

function filterSucessfullPromises(promisesMap) {
var results = [];
for (let p of promisesMap) {
Expand Down Expand Up @@ -134,38 +124,57 @@ function momondoScrappper() {
});
}

function retrieveFlightPage(route, targetDate, dateFormat, currency, directFlight, maximize, timeout) {
let fullUrl = buildUrl(route.from, route.to, targetDate.format(dateFormat), currency, directFlight);
function resizeWindow(maximize) {
if (maximize) {
driver.manage().window().maximize();
return driver.manage().window().maximize();
} else {
return Promise.resolve();
}
driver.get(fullUrl);
}

let inProgressPromise = driver.wait(() => {
return driver.findElement(By.id('searchProgressText')).getText().then((text) => {
return text === 'Search complete';
function retrieveFlightPage(route, targetDate, dateFormat, currency, directFlight, maximize, timeout) {
return resizeWindow(maximize).then(() => {
let fullUrl = buildUrl(route.from, route.to, targetDate.format(dateFormat), currency, directFlight);
let getPromise = driver.get(fullUrl);
return getPromise.then(() => {
let inProgressPromise = driver.wait(() => {
return driver.findElement(By.id('searchProgressText')).getText().then((text) => {
return text === 'Search complete';
});
}, timeout);
return inProgressPromise.then(() => {
return retrieveFlightData(route, targetDate, dateFormat);
});
});
}, timeout);

return inProgressPromise.then(() => {
return retrieveFlightData(route, targetDate, dateFormat);
});
}

function handleError(error) {
debug(error);
return Promise.resolve([]);
function takeScreenShot(route, targetDate) {
return driver.takeScreenshot().then((data) => {
let todayDate = Utils.getTodayDateString('DD-MM-YYYY_HH_mm');
let imgName = todayDate + '_' + route.from + '_' + route.to + '_' + targetDate + '.png';
let ssPath = 'screenshots' + path.sep;
fs.writeFileSync(ssPath + imgName, data, 'base64');
debug('Screenshot saved at ' + ssPath + imgName + ' !');
});
}

function launchRetry(retries, error, route, date, dateFormat, currency, directFlight, maximize, timeout) {
if (retries > 0) {
retries--;
function scrapFlights(route, date, dateFormat, currency, directFlight, maximize, timeout, retries) {
return retrieveFlightPage(route, date, dateFormat, currency, directFlight, maximize, timeout).catch((error) => {
debug('Caught an error while trying to retrieve the flights');
debug(error);
debug('Retrying...');
return retrieveFlightPage(route, date, dateFormat, currency, directFlight, maximize, timeout).catch(handleError);
} else {
return handleError(error);
}
return takeScreenShot(route, date, dateFormat).then(() => {
debug('Retrying...');
return scrapFlights(route, date, dateFormat, currency, directFlight, maximize, timeout, retries - 1);
}).catch((err) => {
debug('Failed to take screenshot');
debug(err);
stopBrowser();
startBrowser();
debug('Retrying...');
return scrapFlights(route, date, dateFormat, currency, directFlight, maximize, timeout, retries - 1);
});
});
}

function scrap(args) {
Expand All @@ -175,26 +184,9 @@ function momondoScrappper() {
let currency = args.currency;
let directFlight = args.directFlight;
let maximize = args.maximize;
let timeout = args.timeout || 40000;
let timeout = args.timeout || 80000;
let retries = args.retries || 1;
try {
return retrieveFlightPage(route, date, dateFormat, currency, directFlight, maximize, timeout).catch((error) => {
return takeScreenShot(route, date, dateFormat).then(() => {
return launchRetry(retries, error, route, date, dateFormat, currency, directFlight, maximize, timeout);
}).catch((err) => {
debug(err);
debug('Driver crashed');
stopBrowser();
startBrowser();
return launchRetry(retries, error, route, date, dateFormat, currency, directFlight, maximize, timeout);
});

});
} catch (error) {
return takeScreenShot(route, date).then(() => {
return handleError(error);
});
}
return scrapFlights(route, date, dateFormat, currency, directFlight, maximize, timeout, retries);
}

return {
Expand All @@ -204,4 +196,4 @@ function momondoScrappper() {
};
}

module.exports = momondoScrappper();
module.exports = momondoScrappper();
2 changes: 1 addition & 1 deletion src/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ function getDefaultOptions() {
targetDate: Utils.getDefaultDateString(defaultDateFormat),
database: 'localhost:27017/flight-scrappper',
collection: 'flight-data',
timeout: 50000,
timeout: 80000,
browser: 'chrome',
chromedriverArgs: [],
maximize: false,
Expand Down
1 change: 1 addition & 0 deletions src/persistency-module.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ function persistencyModule() {
let insertion = db.collection(collection).insertMany(docs);
return insertion.then((res) => {
debug('Persisted ' + res.insertedIds.length + ' results');
debug(res.insertedIds);
db.close();
return res.insertedIds;
});
Expand Down
7 changes: 6 additions & 1 deletion src/utils.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
var Moment = require('moment');
const util = require('util');
var FlightTime = require('../src/flight').FlightTime;

module.exports = {
Expand All @@ -12,7 +13,11 @@ module.exports = {
},

prettifyObject(obj) {
return JSON.stringify(obj, null, 4);
return util.inspect(obj, {
depth: null,
colors: true,
breakLength: 90
});
},

isNumeric(str) {
Expand Down

0 comments on commit 765456b

Please sign in to comment.