From f707ddba9cf0aa235d2443c4cb5a9c8970e6978c Mon Sep 17 00:00:00 2001 From: csrichter Date: Fri, 28 Apr 2017 14:15:45 +0200 Subject: [PATCH] added caching, moved table read to other thread (-> faster startup) --- python/tmc_classes.py | 99 ++++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 29 deletions(-) diff --git a/python/tmc_classes.py b/python/tmc_classes.py index 74e07b3..796ccfb 100644 --- a/python/tmc_classes.py +++ b/python/tmc_classes.py @@ -30,13 +30,22 @@ #rename to common.py? from bitstring import BitArray import copy,csv,code +import hashlib,os,time +import cPickle as pickle#faster for python2 + from collections import namedtuple #Street = namedtuple("Street", "name lcn") #Street(lcn=12,name="test") #Street(name='test', lcn=12) #Street(lcn=12,name="test").name - +import threading +#thr = threading.Thread(target=foo, args=(), kwargs={}) +#thr.start() # will run "foo" +#.... +#thr.is_alive() # will return whether foo is running currently +#.... +#thr.join() # will wait till "foo" is done language="de"#currently supported: de, en (both partially) @@ -53,25 +62,36 @@ def ordinal(num): return str(num) + suffix class lcl: def __init__(self,lcldir): + self.init_done=False + self.thr = threading.Thread(target=self.readtables, args=[lcldir], kwargs={}) + self.thr.start() + def readtables(self,lcldir): + if os.path.exists(lcldir) and not os.path.exists(lcldir+"/cache"): + os.makedirs(lcldir+"/cache") try: - #self.points= self.dat_to_dict(lcldir+'POINTS.DAT','ISO 8859-15','LCD') - #self.poffsets= self.dat_to_dict(lcldir+'POFFSETS.DAT','ISO 8859-15','LCD') - self.names= self.dat_to_dict(lcldir+'NAMES.DAT','ISO 8859-15','NID') - #self.roads=self.dat_to_dict(lcldir+'ROADS.DAT','ISO 8859-15','LCD') - #self.segments=self.dat_to_dict(lcldir+'SEGMENTS.DAT','ISO 8859-15','LCD') - #self.allocated_codes=self.dat_to_dict(lcldir+'LOCATIONCODES.DAT','ISO 8859-15','LCD') - #self.areas=self.dat_to_dict(lcldir+'ADMINISTRATIVEAREA.DAT','ISO 8859-15','LCD') - self.allocated_codes=self.dat_to_tuple_dict(lcldir+'LOCATIONCODES.DAT','ISO 8859-15','Code') - self.points= self.dat_to_tuple_dict(lcldir+'POINTS.DAT','ISO 8859-15','Point') - self.poffsets= self.dat_to_tuple_dict(lcldir+'POFFSETS.DAT','ISO 8859-15','POffset') - self.roads=self.dat_to_tuple_dict(lcldir+'ROADS.DAT','ISO 8859-15','Road') - self.segments=self.dat_to_tuple_dict(lcldir+'SEGMENTS.DAT','ISO 8859-15','Segment') - self.areas=self.dat_to_tuple_dict(lcldir+'ADMINISTRATIVEAREA.DAT','ISO 8859-15','Area') - #code.interact(local=locals()) + self.start=time.time() + #print("start\t"+str(time.time()-self.start)) + #self.points= self.dat_to_dict(lcldir,'POINTS.DAT','ISO 8859-15','LCD') + #self.poffsets= self.dat_to_dict(lcldir,'POFFSETS.DAT','ISO 8859-15','LCD') + self.names= self.dat_to_dict(lcldir,'NAMES.DAT','ISO 8859-15','NID') + #self.segments=self.dat_to_dict(lcldir,'SEGMENTS.DAT','ISO 8859-15','LCD') + #self.allocated_codes=self.dat_to_dict(lcldir,'LOCATIONCODES.DAT','ISO 8859-15','LCD') + #self.areas=self.dat_to_dict(lcldir,'ADMINISTRATIVEAREA.DAT','ISO 8859-15','LCD') + self.allocated_codes=self.dat_to_tuple_dict(lcldir,'LOCATIONCODES.DAT','ISO 8859-15','Code') + self.points= self.dat_to_tuple_dict(lcldir,'POINTS.DAT','ISO 8859-15','Point') + self.poffsets= self.dat_to_tuple_dict(lcldir,'POFFSETS.DAT','ISO 8859-15','POffset') + self.roads=self.dat_to_tuple_dict(lcldir,'ROADS.DAT','ISO 8859-15','Road') + self.segments=self.dat_to_tuple_dict(lcldir,'SEGMENTS.DAT','ISO 8859-15','Segment') + self.areas=self.dat_to_tuple_dict(lcldir,'ADMINISTRATIVEAREA.DAT','ISO 8859-15','Area') + self.init_done=True + + print("lcl time:\t"+str(time.time()-self.start)) + # code.interact(local=locals()) except IOError as e: print(e) print("location table not found") def lcn_allocated(self,LCN): + self.thr.join() if self.allocated_codes.has_key(LCN): return self.allocated_codes[LCN].ALLOCATED==u"1" else: @@ -91,20 +111,40 @@ class lcl: return self.points[LCD] def get_name(self,NID): return self.names[NID]["NAME"] - def dat_to_tuple_dict(self,filename,encoding,tuple_name): - csv_reader = csv.reader(open(filename), delimiter=';', quotechar='"') - header=csv_reader.next() - ret_dict={} - tupleClass=namedtuple(tuple_name," ".join(header)) - for row in csv_reader: - # decode ISO 8859-15 back to Unicode, cell by cell: #TODO read encoding from README.DAT - unirow=[unicode(cell, encoding) for cell in row] - linetuple=tupleClass(*unirow)# "*" unpacks the list - lcn=int(linetuple.LCD) - ret_dict[lcn]=linetuple - return ret_dict - def dat_to_dict(self,filename,encoding,id_col_name): - csv_reader = csv.reader(open(filename), delimiter=';', quotechar='"') + def dat_to_tuple_dict(self,lcldir,filename,encoding,tuple_name): + cache_version_str="v1.0" + md5_hash=hashlib.md5(open(lcldir+filename, 'rb').read()+cache_version_str).hexdigest() + if os.path.exists(lcldir+"/cache/"+md5_hash) and os.path.exists(lcldir+"/cache/"+md5_hash+"_header"):#return cached table + #code.interact(local=locals()) + #print("unpickle_start\t"+str(time.time()-self.start)) + header=pickle.load( open(lcldir+"/cache/"+md5_hash+"_header", "rb" ) ) + tupleClass=namedtuple(tuple_name," ".join(header)) + globals()[tuple_name] = tupleClass#so that pickle finds the dynamic class + ret_dict=pickle.load( open(lcldir+"/cache/"+md5_hash, "rb" ) ) + #print("unpickle_end\t"+str(time.time()-self.start)) + return ret_dict + else:#read DAT file and cache result + #print("read_start\t"+str(time.time()-self.start)) + csv_reader = csv.reader(open(lcldir+filename), delimiter=';', quotechar='"') + header=csv_reader.next() + ret_dict={} + tupleClass=namedtuple(tuple_name," ".join(header)) + for row in csv_reader: + # decode ISO 8859-15 back to Unicode, cell by cell: #TODO read encoding from README.DAT + unirow=[unicode(cell, encoding) for cell in row] + linetuple=tupleClass(*unirow)# "*" unpacks the list + lcn=int(linetuple.LCD) + ret_dict[lcn]=linetuple + #code.interact(local=locals()) + #print("pickle_start\t"+str(time.time()-self.start)) + globals()[tuple_name] = tupleClass#so that pickle finds the dynamic class + pickle.dump( ret_dict, open( lcldir+"/cache/"+md5_hash, "wb" ) ) + pickle.dump( header, open( lcldir+"/cache/"+md5_hash+"_header", "wb" ) ) + #print("pickle_end\t"+str(time.time()-self.start)) + return ret_dict + def dat_to_dict(self,lcldir,filename,encoding,id_col_name): + #print("datbegin\t"+str(time.time()-self.start)) + csv_reader = csv.reader(open(lcldir+filename), delimiter=';', quotechar='"') header=csv_reader.next() ret_dict={} for row in csv_reader: @@ -113,6 +153,7 @@ class lcl: linedict=dict(zip(header,unirow)) id_num=int(linedict[id_col_name]) ret_dict[id_num]=linedict + #print("datend\t"+str(time.time()-self.start)) return ret_dict class tmc_event: def __init__(self,ecn,tableobj):