added caching, moved table read to other thread (-> faster startup)

9 years ago · f707ddba9c
1 changed files with 70 additions and 29 deletions
--- a/python/tmc_classes.py
+++ b/python/tmc_classes.py
@ -30,13 +30,22 @@
 #rename to common.py?
 from bitstring import BitArray
 import copy,csv,code
+import hashlib,os,time
+import cPickle as pickle#faster for python2
+

 from collections import namedtuple
 #Street = namedtuple("Street", "name lcn")
 #Street(lcn=12,name="test")
 #Street(name='test', lcn=12)
 #Street(lcn=12,name="test").name
-
+import threading
+#thr = threading.Thread(target=foo, args=(), kwargs={})
+#thr.start() # will run "foo"
+#....
+#thr.is_alive() # will return whether foo is running currently
+#....
+#thr.join() # will wait till "foo" is done


 language="de"#currently supported: de, en (both partially)
@ -53,25 +62,36 @@ def ordinal(num):
    return str(num) + suffix
 class lcl:
    def __init__(self,lcldir):
+      self.init_done=False
+      self.thr = threading.Thread(target=self.readtables, args=[lcldir], kwargs={})
+      self.thr.start()
+    def readtables(self,lcldir):
+      if os.path.exists(lcldir) and not os.path.exists(lcldir+"/cache"):
+        os.makedirs(lcldir+"/cache")
      try:
-        #self.points= self.dat_to_dict(lcldir+'POINTS.DAT','ISO 8859-15','LCD')
-        #self.poffsets= self.dat_to_dict(lcldir+'POFFSETS.DAT','ISO 8859-15','LCD')
-        self.names= self.dat_to_dict(lcldir+'NAMES.DAT','ISO 8859-15','NID')
-        #self.roads=self.dat_to_dict(lcldir+'ROADS.DAT','ISO 8859-15','LCD')
-        #self.segments=self.dat_to_dict(lcldir+'SEGMENTS.DAT','ISO 8859-15','LCD')
-        #self.allocated_codes=self.dat_to_dict(lcldir+'LOCATIONCODES.DAT','ISO 8859-15','LCD')
-        #self.areas=self.dat_to_dict(lcldir+'ADMINISTRATIVEAREA.DAT','ISO 8859-15','LCD')
-        self.allocated_codes=self.dat_to_tuple_dict(lcldir+'LOCATIONCODES.DAT','ISO 8859-15','Code')
-        self.points= self.dat_to_tuple_dict(lcldir+'POINTS.DAT','ISO 8859-15','Point')
-        self.poffsets= self.dat_to_tuple_dict(lcldir+'POFFSETS.DAT','ISO 8859-15','POffset')
-        self.roads=self.dat_to_tuple_dict(lcldir+'ROADS.DAT','ISO 8859-15','Road')
-        self.segments=self.dat_to_tuple_dict(lcldir+'SEGMENTS.DAT','ISO 8859-15','Segment')
-        self.areas=self.dat_to_tuple_dict(lcldir+'ADMINISTRATIVEAREA.DAT','ISO 8859-15','Area')
-        #code.interact(local=locals())
+        self.start=time.time()
+        #print("start\t"+str(time.time()-self.start))
+        #self.points= self.dat_to_dict(lcldir,'POINTS.DAT','ISO 8859-15','LCD')
+        #self.poffsets= self.dat_to_dict(lcldir,'POFFSETS.DAT','ISO 8859-15','LCD')
+        self.names= self.dat_to_dict(lcldir,'NAMES.DAT','ISO 8859-15','NID')
+        #self.segments=self.dat_to_dict(lcldir,'SEGMENTS.DAT','ISO 8859-15','LCD')
+        #self.allocated_codes=self.dat_to_dict(lcldir,'LOCATIONCODES.DAT','ISO 8859-15','LCD')
+        #self.areas=self.dat_to_dict(lcldir,'ADMINISTRATIVEAREA.DAT','ISO 8859-15','LCD')
+        self.allocated_codes=self.dat_to_tuple_dict(lcldir,'LOCATIONCODES.DAT','ISO 8859-15','Code')
+        self.points= self.dat_to_tuple_dict(lcldir,'POINTS.DAT','ISO 8859-15','Point')
+        self.poffsets= self.dat_to_tuple_dict(lcldir,'POFFSETS.DAT','ISO 8859-15','POffset')
+        self.roads=self.dat_to_tuple_dict(lcldir,'ROADS.DAT','ISO 8859-15','Road')
+        self.segments=self.dat_to_tuple_dict(lcldir,'SEGMENTS.DAT','ISO 8859-15','Segment')
+        self.areas=self.dat_to_tuple_dict(lcldir,'ADMINISTRATIVEAREA.DAT','ISO 8859-15','Area')
+        self.init_done=True
+        
+        print("lcl time:\t"+str(time.time()-self.start))
+       # code.interact(local=locals())
      except IOError as e:
        print(e)
        print("location table not found")
    def lcn_allocated(self,LCN):
+        self.thr.join()
        if self.allocated_codes.has_key(LCN):
          return self.allocated_codes[LCN].ALLOCATED==u"1"
        else:
@ -91,20 +111,40 @@ class lcl:
        return self.points[LCD]
    def get_name(self,NID):
        return self.names[NID]["NAME"]
-    def dat_to_tuple_dict(self,filename,encoding,tuple_name):
-        csv_reader = csv.reader(open(filename), delimiter=';', quotechar='"')
-        header=csv_reader.next()
-        ret_dict={}
-        tupleClass=namedtuple(tuple_name," ".join(header))
-        for row in csv_reader:
-            # decode ISO 8859-15 back to Unicode, cell by cell: #TODO read encoding from README.DAT
-            unirow=[unicode(cell, encoding) for cell in row]
-            linetuple=tupleClass(*unirow)# "*" unpacks the list
-            lcn=int(linetuple.LCD)
-            ret_dict[lcn]=linetuple
-        return ret_dict      
-    def dat_to_dict(self,filename,encoding,id_col_name):
-        csv_reader = csv.reader(open(filename), delimiter=';', quotechar='"')
+    def dat_to_tuple_dict(self,lcldir,filename,encoding,tuple_name):
+        cache_version_str="v1.0"
+        md5_hash=hashlib.md5(open(lcldir+filename, 'rb').read()+cache_version_str).hexdigest()
+        if os.path.exists(lcldir+"/cache/"+md5_hash) and os.path.exists(lcldir+"/cache/"+md5_hash+"_header"):#return cached table
+          #code.interact(local=locals())
+          #print("unpickle_start\t"+str(time.time()-self.start))
+          header=pickle.load( open(lcldir+"/cache/"+md5_hash+"_header", "rb" ) )
+          tupleClass=namedtuple(tuple_name," ".join(header))
+          globals()[tuple_name] = tupleClass#so that pickle finds the dynamic class
+          ret_dict=pickle.load( open(lcldir+"/cache/"+md5_hash, "rb" ) )
+          #print("unpickle_end\t"+str(time.time()-self.start))
+          return ret_dict
+        else:#read DAT file and cache result
+          #print("read_start\t"+str(time.time()-self.start))
+          csv_reader = csv.reader(open(lcldir+filename), delimiter=';', quotechar='"')
+          header=csv_reader.next()
+          ret_dict={}
+          tupleClass=namedtuple(tuple_name," ".join(header))
+          for row in csv_reader:
+              # decode ISO 8859-15 back to Unicode, cell by cell: #TODO read encoding from README.DAT
+              unirow=[unicode(cell, encoding) for cell in row]
+              linetuple=tupleClass(*unirow)# "*" unpacks the list
+              lcn=int(linetuple.LCD)
+              ret_dict[lcn]=linetuple
+          #code.interact(local=locals())
+          #print("pickle_start\t"+str(time.time()-self.start))
+          globals()[tuple_name] = tupleClass#so that pickle finds the dynamic class
+          pickle.dump( ret_dict, open( lcldir+"/cache/"+md5_hash, "wb" ) )
+          pickle.dump( header, open( lcldir+"/cache/"+md5_hash+"_header", "wb" ) )
+          #print("pickle_end\t"+str(time.time()-self.start))
+          return ret_dict      
+    def dat_to_dict(self,lcldir,filename,encoding,id_col_name):
+        #print("datbegin\t"+str(time.time()-self.start))
+        csv_reader = csv.reader(open(lcldir+filename), delimiter=';', quotechar='"')
        header=csv_reader.next()
        ret_dict={}
        for row in csv_reader:
@ -113,6 +153,7 @@ class lcl:
            linedict=dict(zip(header,unirow))
            id_num=int(linedict[id_col_name])
            ret_dict[id_num]=linedict
+        #print("datend\t"+str(time.time()-self.start))
        return ret_dict
 class tmc_event:
    def __init__(self,ecn,tableobj):