summaryrefslogtreecommitdiff
path: root/utils/common/gitscraper.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/common/gitscraper.py')
-rwxr-xr-xutils/common/gitscraper.py58
1 files changed, 33 insertions, 25 deletions
diff --git a/utils/common/gitscraper.py b/utils/common/gitscraper.py
index 86d6a980cd..496d32bce3 100755
--- a/utils/common/gitscraper.py
+++ b/utils/common/gitscraper.py
@@ -42,10 +42,11 @@ def get_refs(repo):
42 @return Dict matching hashes to each ref. 42 @return Dict matching hashes to each ref.
43 ''' 43 '''
44 print("Getting list of refs") 44 print("Getting list of refs")
45 output = subprocess.Popen(["git", "show-ref", "--abbrev"], 45 output = subprocess.Popen(
46 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) 46 ["git", "show-ref", "--abbrev"],
47 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
47 cmdout = output.communicate() 48 cmdout = output.communicate()
48 refs = {} 49 refs = dict()
49 50
50 if len(cmdout[1]) > 0: 51 if len(cmdout[1]) > 0:
51 print("An error occured!\n") 52 print("An error occured!\n")
@@ -53,7 +54,7 @@ def get_refs(repo):
53 return refs 54 return refs
54 55
55 for line in cmdout: 56 for line in cmdout:
56 regex = re.findall(b'([a-f0-9]+)\s+(\S+)', line) 57 regex = re.findall(b'([a-f0-9]+)\\s+(\\S+)', line)
57 for r in regex: 58 for r in regex:
58 # ref is the key, hash its value. 59 # ref is the key, hash its value.
59 refs[r[1].decode()] = r[0].decode() 60 refs[r[1].decode()] = r[0].decode()
@@ -61,7 +62,7 @@ def get_refs(repo):
61 return refs 62 return refs
62 63
63 64
64def get_lstree(repo, start, filterlist=[]): 65def get_lstree(repo, start, filterlist=None):
65 '''Get recursive list of tree objects for a given tree. 66 '''Get recursive list of tree objects for a given tree.
66 @param repo Path to repository root. 67 @param repo Path to repository root.
67 @param start Hash identifying the tree. 68 @param start Hash identifying the tree.
@@ -69,10 +70,13 @@ def get_lstree(repo, start, filterlist=[]):
69 An empty list will retrieve all paths. 70 An empty list will retrieve all paths.
70 @return Dict mapping filename to blob hash 71 @return Dict mapping filename to blob hash
71 ''' 72 '''
72 output = subprocess.Popen(["git", "ls-tree", "-r", start], 73 if filterlist is None:
73 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) 74 filterlist = list()
75 output = subprocess.Popen(
76 ["git", "ls-tree", "-r", start],
77 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
74 cmdout = output.communicate() 78 cmdout = output.communicate()
75 objects = {} 79 objects = dict()
76 80
77 if len(cmdout[1]) > 0: 81 if len(cmdout[1]) > 0:
78 print("An error occured!\n") 82 print("An error occured!\n")
@@ -80,8 +84,8 @@ def get_lstree(repo, start, filterlist=[]):
80 return objects 84 return objects
81 85
82 for line in cmdout[0].decode().split('\n'): 86 for line in cmdout[0].decode().split('\n'):
83 regex = re.findall(b'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(.*)', 87 regex = re.findall(b'([0-9]+)\\s+([a-z]+)\\s+([0-9a-f]+)\\s+(.*)',
84 line.encode()) 88 line.encode())
85 for rf in regex: 89 for rf in regex:
86 # filter 90 # filter
87 add = False 91 add = False
@@ -107,8 +111,8 @@ def get_file_timestamp(repo, tree, filename):
107 @return Timestamp as string. 111 @return Timestamp as string.
108 ''' 112 '''
109 output = subprocess.Popen( 113 output = subprocess.Popen(
110 ["git", "log", "--format=%ai", "-n", "1", tree, filename], 114 ["git", "log", "--format=%ai", "-n", "1", tree, filename],
111 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) 115 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
112 cmdout = output.communicate() 116 cmdout = output.communicate()
113 117
114 return cmdout[0].decode().rstrip() 118 return cmdout[0].decode().rstrip()
@@ -121,8 +125,9 @@ def get_object(repo, blob, destfile):
121 @param destfile filename for blob output. 125 @param destfile filename for blob output.
122 @return True if file was successfully written, False on error. 126 @return True if file was successfully written, False on error.
123 ''' 127 '''
124 output = subprocess.Popen(["git", "cat-file", "-p", blob], 128 output = subprocess.Popen(
125 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) 129 ["git", "cat-file", "-p", blob],
130 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
126 cmdout = output.communicate() 131 cmdout = output.communicate()
127 # make sure output path exists 132 # make sure output path exists
128 if len(cmdout[1]) > 0: 133 if len(cmdout[1]) > 0:
@@ -143,8 +148,9 @@ def describe_treehash(repo, treehash):
143 @param treehash Hash identifying the tree / commit to describe. 148 @param treehash Hash identifying the tree / commit to describe.
144 @return Description string. 149 @return Description string.
145 ''' 150 '''
146 output = subprocess.Popen(["git", "describe", treehash], 151 output = subprocess.Popen(
147 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) 152 ["git", "describe", treehash],
153 stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
148 cmdout = output.communicate() 154 cmdout = output.communicate()
149 if len(cmdout[1]) > 0: 155 if len(cmdout[1]) > 0:
150 print("An error occured!\n") 156 print("An error occured!\n")
@@ -153,7 +159,7 @@ def describe_treehash(repo, treehash):
153 return cmdout[0].rstrip() 159 return cmdout[0].rstrip()
154 160
155 161
156def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]): 162def scrape_files(repo, treehash, filelist, dest=None, timestamp_files=None):
157 '''Scrape list of files from repository. 163 '''Scrape list of files from repository.
158 @param repo Path to repository root. 164 @param repo Path to repository root.
159 @param treehash Hash identifying the tree. 165 @param treehash Hash identifying the tree.
@@ -167,7 +173,9 @@ def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]):
167 ''' 173 '''
168 print("Scraping files from repository") 174 print("Scraping files from repository")
169 175
170 if dest == "": 176 if timestamp_files is None:
177 timestamp_files = list()
178 if dest is None:
171 dest = tempfile.mkdtemp() 179 dest = tempfile.mkdtemp()
172 treeobjects = get_lstree(repo, treehash, filelist) 180 treeobjects = get_lstree(repo, treehash, filelist)
173 timestamps = {} 181 timestamps = {}
@@ -180,8 +188,8 @@ def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]):
180 return [dest, timestamps] 188 return [dest, timestamps]
181 189
182 190
183def archive_files(repo, treehash, filelist, basename, tmpfolder="", 191def archive_files(repo, treehash, filelist, basename, tmpfolder=None,
184 archive="tbz"): 192 archive="tbz"):
185 '''Archive list of files into tarball. 193 '''Archive list of files into tarball.
186 @param repo Path to repository root. 194 @param repo Path to repository root.
187 @param treehash Hash identifying the tree. 195 @param treehash Hash identifying the tree.
@@ -197,20 +205,20 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="",
197 @return Output filename. 205 @return Output filename.
198 ''' 206 '''
199 207
200 if tmpfolder == "": 208 if tmpfolder is None:
201 temp_remove = True 209 temp_remove = True
202 tmpfolder = tempfile.mkdtemp() 210 tmpfolder = tempfile.mkdtemp()
203 else: 211 else:
204 temp_remove = False 212 temp_remove = False
205 workfolder = scrape_files(repo, treehash, filelist, 213 workfolder = scrape_files(
206 os.path.join(tmpfolder, basename))[0] 214 repo, treehash, filelist, os.path.join(tmpfolder, basename))[0]
207 if basename is "": 215 if basename is "":
208 return "" 216 return ""
209 print("Archiving files from repository") 217 print("Archiving files from repository")
210 if archive == "7z": 218 if archive == "7z":
211 outfile = basename + ".7z" 219 outfile = basename + ".7z"
212 output = subprocess.Popen(["7z", "a", 220 output = subprocess.Popen(
213 os.path.join(os.getcwd(), basename + ".7z"), basename], 221 ["7z", "a", os.path.join(os.getcwd(), basename + ".7z"), basename],
214 cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 222 cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
215 output.communicate() 223 output.communicate()
216 elif archive == "tbz": 224 elif archive == "tbz":