How to run processing commands in parallel in QGIS

Yep if you want to run a whole bunch of processing commands like “qgis:dissolve” or whatever in parallel simply and easily on your ubuntu computer you can! Hooray! Here’s how.

You’ll need gnu parallel installed so type the following in BASH

sudo apt-get -y install parallel

Then you’ll need a standalone qgis processing wrapper which can run processing commands directly from BASH. Assuming your using ubuntu and the appropriate place for user executable scripts is “/usr/local/bin/” copy the following into the BASH command line.

sudo nano /usr/local/bin/SA_runalg.py

Now copy the following python code into nano then save and exit.

#!/usr/bin/python2.7
import qgis
from qgis.core import *
from qgis import utils
from qgis import *
from PyQt4.QtCore import *
import io, os, sys, collections, random, tempfile
from PyQt4.QtGui import *
operation = sys.argv[1]
app = QgsApplication([],True, None)
app.setPrefixPath("/usr", True)
app.initQgis()
sys.path.append('/usr/share/qgis/python/plugins')
from processing.core.Processing import Processing
Processing.initialize()
from processing.tools import *
alg_args = sys.argv[1:]
general.runalg(*alg_args)
app.exitQgis()
app.exit()

Thanks to all the people who helped with questions and answers on Stack exchange which made this function possible!

Now make it executable with

sudo chmod +x /usr/local/bin/SA_runalg.py

One final thing:
While you could use this script by itself from bash which is cool and even use gnuparallel to run it in parallel from BASH which is even cooler my aim is to run processing commands from within pyQGIS which would be supa cool.

In order to access the BASH standalone QGIS processing script in parallel from within pyQGIS we need a small python wrapper to take the list of commands and pass them through to parallel.

So inside your python QGIS script add the following function

def parallel_runalg(processing_command_lists):
    import os, stat, subprocess, shutil, tempfile
    temp_script=os.path.join(tempfile.gettempdir(),"parallelclip.sh")
    commands_list_file=os.path.join(tempfile.gettempdir(),"commands.txt")
    s = open(commands_list_file,'w')
    for command_string in processing_command_lists:
        command_string_write=command_string
        s.write("SA_runalg.py "+command_string+'\n')
    s.close()
    f = open(temp_script,'w')
    f.write("#!/bin/bash\n")
    f.write("cat "+commands_list_file+" | parallel")
    f.close()
    st = os.stat(temp_script)
    os.chmod(temp_script, st.st_mode | stat.S_IEXEC)
    p = subprocess.Popen(temp_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE,stdin=subprocess.PIPE, bufsize=1)
    out, err = p.communicate()
    print out
    print err

Now you can run a list of processing commands in parallel from within QGIS yay you!

Heres a small piece of code to test it out. Dont forget to listen to your cpu fan fire up as all the cores are bought to bare!

import random, tempfile, os
files_to_dissolve=[]
for i in range(1,9):
    filename=os.path.join(tempfile.gettempdir(),str(i)+"example.shp")
    files_to_dissolve.append(filename)
    bboxlayer=QgsVectorLayer("Polygon?crs=epsg:4326&field=id:integer&field=name:string(20)&index=yes","bbox_polygon","memory")
    bbl_dp = bboxlayer.dataProvider()
    bboxlayer.startEditing()
    bbox_features=[]
    for i in range(900):
        xseed=random.uniform(0, 30)
        yseed=random.uniform(0, 30)
        bbl_dp = bboxlayer.dataProvider()
        bboxlayer.startEditing()
        bbox_feature = QgsFeature()
        bbox_feature.setGeometry(QgsGeometry.fromPolygon( [ [ \
        QgsPoint(xseed,yseed),\
        QgsPoint(xseed,yseed+1), \
        QgsPoint(xseed+1,yseed+1),
        QgsPoint(xseed+1,yseed) ] ] ))
        bbox_features.append(bbox_feature)
    (res, outFeats) = bboxlayer.dataProvider().addFeatures(bbox_features)
    bboxlayer.commitChanges()
    QgsMapLayerRegistry.instance().addMapLayer(bboxlayer)
    write_error = \
    QgsVectorFileWriter.writeAsVectorFormat(bboxlayer, \
    filename, "system", \
    QgsCoordinateReferenceSystem(4326), \
    "ESRI Shapefile")
    
#List of processing_command_lists
commands_list=[]
output_files=[]
for input_file in files_to_dissolve:
    output_file=os.path.join(os.path.dirname(input_file),"D"+os.path.basename(input_file))
    output_files.append(output_file)
    processing_command="'qgis:dissolve' "+ input_file + " True"+ " None "+ output_file
    commands_list.append(processing_command)
    
parallel_runalg(commands_list)

for output_file in output_files:
    layer = QgsVectorLayer(output_file, output_file, "ogr")
    QgsMapLayerRegistry.instance().addMapLayer(layer)

Leave a Reply

Your email address will not be published. Required fields are marked *


4 + = nine