I have an working script which looks kind of...
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.open("http://www.any_website.com", function(status) {
if ( status == "success" ) {
page.evaluate(function() {
document.querySelector("input[name='MAIL_ADDRESS']").value = "any@mail.com";
document.querySelector("input[name='PASSWORD']").value = "the_real_password";
document.getElementsByName("LOGIN_FORM_SUBMIT")[0].click();
console.log("Login submitted!");
});
window.setTimeout(function () {
var ua = page.evaluate(function () {
return document.getElementById('ContentMain').innerHTML;
});
console.log(ua);
phantom.exit();
}, 20000);
}
});
As far as good.
But as you might see, I have implemented a fix timeout with 20 seconds after the click on the login button. I want to get rid of this and I want the script to close immediately after the login was done. I was playing around for months now but I wasn't able to find a solution without timing constraints, which would be far more elegant and efficient and robust.
Can somebody help with the adaptation of the code?
thanks
PS: More infos about the functionality of javascript + phantomjs are welcome. I'm not really knowing what I'm doing here and I don't know if the second page.evaluate call makes sense.
PPS: Is there a delay function which waits until the site was fully loaded?
Edit 1:
Thank you for the comments. I can precise "fully loaded" to say that a defined string shall appear in the data. I tried a different approach in looping with setInterval and looking for a specific string in the html data.
This new code isn't working because the script hangs after step 1. I think when I readout the page.content value, the whole phantomjs processing stopps and I wont get page.content to early it won't get the latest data after the login at any time.
The plan was just to poll the html data as long as I find a specific string which I know will appear when the site is loaded.
When I rise the interval to 5000 or higher it can be that the script works because the page.content was called after the final data appeared?! (not sure but that's my explain)
Any idea how to poll the html data without breaking/stopping the site download/processing?
if (!String.prototype.includes) {
String.prototype.includes = function(search, start) {
'use strict';
if (typeof start !== 'number') {
start = 0;
}
if (start + search.length > this.length) {
return false;
} else {
return this.indexOf(search, start) !== -1;
}
};
}
var page = require('webpage').create(), testindex = 0, loadInProgress = false, delayedLoad = false;
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
var steps = [
function() {
//Load Login Page
page.open("http://www.any_website.com");
},
function() {
//Enter Credentials and login
page.evaluate(function() {
document.querySelector("input[name='MAIL_ADDRESS']").value = "real_name";
document.querySelector("input[name='PASSWORD']").value = "real_password";
document.getElementsByName("LOGIN_FORM_SUBMIT")[0].click();
});
},
function() {
// Output content of page to stdout after form has been submitted
page.render('out.png');
page.evaluate(function() {
console.log(document.getElementById('ContentMain').innerHTML);
});
}
];
// this is for signalizing phantomjs when all the data has finished loading
var stepstop = [ "", "Stop Text at the End of the needed Data", ""];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
if (delayedLoad == false) {
console.log("step " + testindex);
steps[testindex]();
}
if (stepstop[testindex] != "") {
var tempHTML = page.content;
// console.log("b " + tempHTML.length);
console.log("c " + stepstop[testindex]);
// console.log("d " + tempHTML);
console.log("e " + tempHTML.includes(stepstop[testindex]));
if (tempHTML.includes(stepstop[testindex]) != false) {
console.log("step " + testindex + ": HTML stop found");
delayedLoad = false;
testindex++;
} else {
console.log("step " + testindex + ": HTML stop not found");
delayedLoad = true;
}
} else {
console.log("step " + testindex + ": no HTML stop search needed");
testindex++;
}
}
if (typeof steps[testindex] != "function") {
console.log("shutdown phantom");
phantom.exit();
}
}, 100);