Hmm I've been chopping it up and trying a bunch of things so the code is super ugly now (basically I thought I might be hitting some kind of file lock, so literally split everything up to try and ensure nothing is reading or writing to the same variable).
It looks something like this atm, which does run, it just doesn't seem to see any reduction in process time. The other thing to note is this is (currently) running on an instanced node, of which there are hundreds. The cumulative process time for seperation() is ~10ms, and when split into 4 it drops to sub ~3ms each, but still accumulates to the same total per frame:
var finalsep_1 = Vector3(0.0,0.0,0.0)
var finalsep_2 = Vector3(0.0,0.0,0.0)
var finalsep_3 = Vector3(0.0,0.0,0.0)
var finalsep_4 = Vector3(0.0,0.0,0.0)
var boiddist = 2
var boiddist2 = 2
var boiddist3 = 2
var boiddist4 = 2
onready var thread1 = Thread.new()
onready var thread2 = Thread.new()
onready var thread3 = Thread.new()
onready var thread4 = Thread.new()
func _process(delta):
# ...
# other parts of code
# ...
bl_1 = []
bl_2 = []
bl_3 = []
bl_4 = []
if boid_list.size()>0:
#imperfect method for splitting an array into 4 equal sized chunks, but works for the most part
bl_1 = boid_list.slice(0,int(boid_list.size()/4))
bl_2 = boid_list.slice(int(boid_list.size()/4),int((boid_list.size()/4)*2))
bl_3 = boid_list.slice(int((boid_list.size()/4)*2),int((boid_list.size()/4)*3))
bl_4 = boid_list.slice(int((boid_list.size()/4)*3),int(boid_list.size()))
# I 'assumed' this bit would essentially run all 4 threads in parrallel...
thread1.start(self, "seperation_1", null)
thread2.start(self, "seperation_2", null)
thread3.start(self, "seperation_3", null)
thread4.start(self, "seperation_4", null)
# Until this rejoins and retrieves them, effectively meaning the processing time is as slow as the slowest thread + some amount of overhead
thread1.wait_to_finish()
thread2.wait_to_finish()
thread3.wait_to_finish()
thread4.wait_to_finish()
# Then I can just restack the output
flockSeperation = ((finalsep_1+finalsep_2+finalsep_3+finalsep_4) / boid_list.size())
# There are now four of these, each numbered.
func seperation_1(x):
finalsep_1 = Vector3(0.0,0.0,0.0)
for x in bl_1:
if self.global_transform.origin != boidpos:
expensive code runs here then:
finalsep_1 += output