Untrusted, unstable, memory leaking third-party sub-process

时间:2015-10-06 08:18:18

标签: java multithreading selenium-webdriver phantomjs

I have a little Java program, that works down a queue of URLs and takes screenshots of each website.

To achieve this, I am using Selenium with phantomjsdriver (and a phantomjs2 binary)

<dependency>
   <groupId>org.seleniumhq.selenium</groupId>
   <artifactId>selenium-server</artifactId>
   <version>2.44.0</version>
</dependency>
<dependency>
   <groupId>com.github.detro.ghostdriver</groupId>
   <artifactId>phantomjsdriver</artifactId>
   <version>1.1.0</version>
</dependency>

I am using a ThreadPoolExecutor to work on each URL and get an idle PhantomJS instance from a ResourcePool.

Now the problem is, that sometimes, PhantomJS doesn't seem to handle (remote website) errors correctly which causes that instance to block. It does not take long to have all instances in the Resourcepool in a blocking / unresponsive state.

I need help to ensure, that PhantomJS threads have a maximum lifetime and a way to reliably shut it down after a specific amount of screenshots taken or when it does not respond within a defined time.

Simplified code

private ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(corePoolSize, maxPoolSize, 10, TimeUnit.MINUTES, workerQueue);
private SeleniumResourcePool seleniumResourcePool = new SeleniumResourcePool(cores, true, PhantomService.class);

public void run() {
    threadPoolExecutor.prestartAllCoreThreads();

    List<Message> messages = awsSqsUtil.getMessagesFromQueue(awsQueueUrl, 1);
    for (Message message : messages) {
        logger.debug("Feeding new ScreenshotItemTask");
        threadPoolExecutor.execute(
            new ScreenshotItemTask(message, awsQueueUrl, seleniumResourcePool)
        );
    }
}

SeleniumResourcePool.java

package com.opendi.util.selenium;

import com.opendi.util.selenium.service.PhantomService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.locks.ReentrantLock;

public class SeleniumResourcePool {
    protected Logger logger = LoggerFactory.getLogger(SeleniumResourcePool.class);
    protected final BlockingQueue<PhantomService> pool;
    protected final ReentrantLock lock = new ReentrantLock();
    protected int createdObjects = 0;
    protected int size;

    public SeleniumResourcePool(int size, Boolean dynamicCreation) {
        pool = new ArrayBlockingQueue<>(size, true);
        this.size = size;
        if (!dynamicCreation) {
            lock.lock();
        }
        logger.info("Spawning a pool of " + size + " Selenium instances");

        try {
            createPool();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    protected PhantomService createInstance() throws IOException {
        return new PhantomService();
    }

    public void recycle(PhantomService service) throws IOException {
        if (service.getLifetime() > 10) {
            // this never happens, when all instances are in a "stuck" state
            logger.debug("Shutting down selenium service");
            service.destroy();
            service = createInstance();
        }
        pool.add(service);
    }

    public PhantomService acquire() throws IOException, InterruptedException {
        if (!lock.isLocked()) {
            if (lock.tryLock()) {
                try {
                    ++createdObjects;
                    return createInstance();
                } finally {
                    if (createdObjects < size) lock.unlock();
                }
            }
        }
        return pool.take();
    }

    public void createPool() throws IOException {
        if (lock.isLocked()) {
            for (int i = 0; i < size; ++i) {
                pool.add(createInstance());
                createdObjects++;
            }
        }
    }
}

PhantomService.java

package com.opendi.util.selenium.service;

import org.openqa.selenium.Dimension;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;

public class PhantomService {
    protected Logger logger = LoggerFactory.getLogger(PhantomService.class);
    protected PhantomJSDriver driver;
    protected PhantomJSDriverService service;
    protected int lifetime = 1;

    public PhantomService() throws IOException {
        java.util.logging.Logger.getLogger(PhantomJSDriverService.class.getName()).setLevel(Level.OFF);
        service = new PhantomJSDriverService.Builder()
                .usingPhantomJSExecutable(new File("bin/amd64/phantomjs"))
                .usingAnyFreePort()
                .build();

        service.start();
    }

    public PhantomJSDriver getDriver() {
        if (driver == null) {
            DesiredCapabilities capabilities = DesiredCapabilities.phantomjs();
            capabilities.setJavascriptEnabled(true);
            capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS, new String[] {
                    "--web-security=false",
                    "--ignore-ssl-errors=true",
                    "--ssl-protocol=any",
                    "--webdriver-loglevel=NONE"
            });


            driver = new PhantomJSDriver(service, capabilities);

            driver.manage().window().setSize(new Dimension(1024, 768));
            driver.manage().timeouts().setScriptTimeout(60, TimeUnit.SECONDS);
            driver.manage().timeouts().pageLoadTimeout(60, TimeUnit.SECONDS);
            driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
        }
        return driver;
    }

    public int getLifetime() {
        logger.debug("Checking lifetime: " + lifetime);
        return lifetime++;
    }

    public void destroy() {
        if (driver != null) {
            driver.close();
            driver.quit();
        }
        if (service != null && service.isRunning()) {
            service.stop();
        }
    }
}

0 个答案:

没有答案