Ganga.Lib.LCG.Utility

1 #!/usr/bin/env python 2 import re 3 import time 4 import random 5 import gzip 6 from Ganga.Utility.logging import getLogger 7 from Ganga.Lib.LCG.ElapsedTimeProfiler import ElapsedTimeProfiler 8 from Ganga.Lib.LCG.Compatibility import * 9

10 -def get_uuid(*args):

11 ''' Generates a universally unique ID. ''' 12 t = long( time.time() * 1000 ) 13 r = long( random.random()*100000000000000000L ) 14 try: 15 a = socket.gethostbyname( socket.gethostname() ) 16 except: 17 # if we can't get a network address, just imagine one 18 a = random.random()*100000000000000000L 19 data = str(t)+' '+str(r)+' '+str(a)+' '+str(args) 20 21 md5_obj = get_md5_obj() 22 md5_obj.update( data ) 23 data = md5_obj.hexdigest() 24 25 return data

26

27 -def urisplit(uri):

28 """ 29 Basic URI Parser according to STD66 aka RFC3986 30 31 >>> urisplit("scheme://authority/path?query#fragment") 32 ('scheme', 'authority', 'path', 'query', 'fragment') 33 34 """ 35 # regex straight from STD 66 section B 36 regex = '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' 37 p = re.match(regex, uri).groups() 38 scheme, authority, path, query, fragment = p[1], p[3], p[4], p[6], p[8] 39 #if not path: path = None 40 return (scheme, authority, path, query, fragment)

41

42 -def readStrippedLines(fileName):

43 '''reads in a list of strings from a file''' 44 45 lines = [] 46 f = open(fileName, 'r') 47 for l in f.readlines(): 48 lines.append(l.strip()) 49 f.close() 50 return lines

51

52 -def filter_string_list(allList, filterList, type=0):

53 '''picks a list of strings from allList (mis-)matching the elementes in the filterList 54 - type = 0 : including lists given by filterLists 55 - type = 1 : excluding lists given by filterLists 56 ''' 57 58 matchedDict = {} 59 allDict = {} 60 61 for item in allList: 62 allDict[item] = True 63 64 if type == 1: 65 matchedDict = allDict 66 67 for filter in filterList: 68 if filter.find('*') > 0: 69 wc = ".*".join(filter.split('*')) 70 for item in allDict.keys(): 71 if re.match(wc, item) != None: 72 if type == 0: matchedDict[item] = True 73 if type == 1: del matchedDict[item] 74 else: 75 if allDict.has_key(filter): 76 if type == 0: matchedDict[filter] = True 77 if type == 1: del matchedDict[filter] 78 79 return matchedDict.keys()

80

81 -def get_md5sum(fname, ignoreGzipTimestamp=False):

82 ''' Calculates the MD5 checksum of a file ''' 83 84 profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) 85 profiler.start() 86 87 88 ## if the file is a zipped format (determined by extension), 89 ## try to get checksum from it's content. The reason is that 90 ## gzip file contains a timestamp in the header, which causes 91 ## different md5sum value even the contents are the same. 92 #re_gzipfile = re.compile('.*[\.tgz|\.gz].*$') 93 94 f = None 95 96 if ignoreGzipTimestamp and (fname.find('.tgz') > 0 or fname.find('.gz') > 0): 97 f = gzip.open(fname,'rb') 98 else: 99 f = open(fname, 'rb') 100 101 m = get_md5_obj() 102 103 while True: 104 d = f.read(8096) 105 if not d: 106 break 107 m.update(d) 108 f.close() 109 110 md5sum = m.hexdigest() 111 112 profiler.check('md5sum calculation time') 113 114 return md5sum

115

Source Code for Module Ganga.Lib.LCG.Utility