diff options
Diffstat (limited to 'utils/common/gitscraper.py')
-rwxr-xr-x | utils/common/gitscraper.py | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/utils/common/gitscraper.py b/utils/common/gitscraper.py index 86d6a980cd..496d32bce3 100755 --- a/utils/common/gitscraper.py +++ b/utils/common/gitscraper.py | |||
@@ -42,10 +42,11 @@ def get_refs(repo): | |||
42 | @return Dict matching hashes to each ref. | 42 | @return Dict matching hashes to each ref. |
43 | ''' | 43 | ''' |
44 | print("Getting list of refs") | 44 | print("Getting list of refs") |
45 | output = subprocess.Popen(["git", "show-ref", "--abbrev"], | 45 | output = subprocess.Popen( |
46 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | 46 | ["git", "show-ref", "--abbrev"], |
47 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | ||
47 | cmdout = output.communicate() | 48 | cmdout = output.communicate() |
48 | refs = {} | 49 | refs = dict() |
49 | 50 | ||
50 | if len(cmdout[1]) > 0: | 51 | if len(cmdout[1]) > 0: |
51 | print("An error occured!\n") | 52 | print("An error occured!\n") |
@@ -53,7 +54,7 @@ def get_refs(repo): | |||
53 | return refs | 54 | return refs |
54 | 55 | ||
55 | for line in cmdout: | 56 | for line in cmdout: |
56 | regex = re.findall(b'([a-f0-9]+)\s+(\S+)', line) | 57 | regex = re.findall(b'([a-f0-9]+)\\s+(\\S+)', line) |
57 | for r in regex: | 58 | for r in regex: |
58 | # ref is the key, hash its value. | 59 | # ref is the key, hash its value. |
59 | refs[r[1].decode()] = r[0].decode() | 60 | refs[r[1].decode()] = r[0].decode() |
@@ -61,7 +62,7 @@ def get_refs(repo): | |||
61 | return refs | 62 | return refs |
62 | 63 | ||
63 | 64 | ||
64 | def get_lstree(repo, start, filterlist=[]): | 65 | def get_lstree(repo, start, filterlist=None): |
65 | '''Get recursive list of tree objects for a given tree. | 66 | '''Get recursive list of tree objects for a given tree. |
66 | @param repo Path to repository root. | 67 | @param repo Path to repository root. |
67 | @param start Hash identifying the tree. | 68 | @param start Hash identifying the tree. |
@@ -69,10 +70,13 @@ def get_lstree(repo, start, filterlist=[]): | |||
69 | An empty list will retrieve all paths. | 70 | An empty list will retrieve all paths. |
70 | @return Dict mapping filename to blob hash | 71 | @return Dict mapping filename to blob hash |
71 | ''' | 72 | ''' |
72 | output = subprocess.Popen(["git", "ls-tree", "-r", start], | 73 | if filterlist is None: |
73 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | 74 | filterlist = list() |
75 | output = subprocess.Popen( | ||
76 | ["git", "ls-tree", "-r", start], | ||
77 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | ||
74 | cmdout = output.communicate() | 78 | cmdout = output.communicate() |
75 | objects = {} | 79 | objects = dict() |
76 | 80 | ||
77 | if len(cmdout[1]) > 0: | 81 | if len(cmdout[1]) > 0: |
78 | print("An error occured!\n") | 82 | print("An error occured!\n") |
@@ -80,8 +84,8 @@ def get_lstree(repo, start, filterlist=[]): | |||
80 | return objects | 84 | return objects |
81 | 85 | ||
82 | for line in cmdout[0].decode().split('\n'): | 86 | for line in cmdout[0].decode().split('\n'): |
83 | regex = re.findall(b'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(.*)', | 87 | regex = re.findall(b'([0-9]+)\\s+([a-z]+)\\s+([0-9a-f]+)\\s+(.*)', |
84 | line.encode()) | 88 | line.encode()) |
85 | for rf in regex: | 89 | for rf in regex: |
86 | # filter | 90 | # filter |
87 | add = False | 91 | add = False |
@@ -107,8 +111,8 @@ def get_file_timestamp(repo, tree, filename): | |||
107 | @return Timestamp as string. | 111 | @return Timestamp as string. |
108 | ''' | 112 | ''' |
109 | output = subprocess.Popen( | 113 | output = subprocess.Popen( |
110 | ["git", "log", "--format=%ai", "-n", "1", tree, filename], | 114 | ["git", "log", "--format=%ai", "-n", "1", tree, filename], |
111 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | 115 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) |
112 | cmdout = output.communicate() | 116 | cmdout = output.communicate() |
113 | 117 | ||
114 | return cmdout[0].decode().rstrip() | 118 | return cmdout[0].decode().rstrip() |
@@ -121,8 +125,9 @@ def get_object(repo, blob, destfile): | |||
121 | @param destfile filename for blob output. | 125 | @param destfile filename for blob output. |
122 | @return True if file was successfully written, False on error. | 126 | @return True if file was successfully written, False on error. |
123 | ''' | 127 | ''' |
124 | output = subprocess.Popen(["git", "cat-file", "-p", blob], | 128 | output = subprocess.Popen( |
125 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | 129 | ["git", "cat-file", "-p", blob], |
130 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | ||
126 | cmdout = output.communicate() | 131 | cmdout = output.communicate() |
127 | # make sure output path exists | 132 | # make sure output path exists |
128 | if len(cmdout[1]) > 0: | 133 | if len(cmdout[1]) > 0: |
@@ -143,8 +148,9 @@ def describe_treehash(repo, treehash): | |||
143 | @param treehash Hash identifying the tree / commit to describe. | 148 | @param treehash Hash identifying the tree / commit to describe. |
144 | @return Description string. | 149 | @return Description string. |
145 | ''' | 150 | ''' |
146 | output = subprocess.Popen(["git", "describe", treehash], | 151 | output = subprocess.Popen( |
147 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | 152 | ["git", "describe", treehash], |
153 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | ||
148 | cmdout = output.communicate() | 154 | cmdout = output.communicate() |
149 | if len(cmdout[1]) > 0: | 155 | if len(cmdout[1]) > 0: |
150 | print("An error occured!\n") | 156 | print("An error occured!\n") |
@@ -153,7 +159,7 @@ def describe_treehash(repo, treehash): | |||
153 | return cmdout[0].rstrip() | 159 | return cmdout[0].rstrip() |
154 | 160 | ||
155 | 161 | ||
156 | def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]): | 162 | def scrape_files(repo, treehash, filelist, dest=None, timestamp_files=None): |
157 | '''Scrape list of files from repository. | 163 | '''Scrape list of files from repository. |
158 | @param repo Path to repository root. | 164 | @param repo Path to repository root. |
159 | @param treehash Hash identifying the tree. | 165 | @param treehash Hash identifying the tree. |
@@ -167,7 +173,9 @@ def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]): | |||
167 | ''' | 173 | ''' |
168 | print("Scraping files from repository") | 174 | print("Scraping files from repository") |
169 | 175 | ||
170 | if dest == "": | 176 | if timestamp_files is None: |
177 | timestamp_files = list() | ||
178 | if dest is None: | ||
171 | dest = tempfile.mkdtemp() | 179 | dest = tempfile.mkdtemp() |
172 | treeobjects = get_lstree(repo, treehash, filelist) | 180 | treeobjects = get_lstree(repo, treehash, filelist) |
173 | timestamps = {} | 181 | timestamps = {} |
@@ -180,8 +188,8 @@ def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]): | |||
180 | return [dest, timestamps] | 188 | return [dest, timestamps] |
181 | 189 | ||
182 | 190 | ||
183 | def archive_files(repo, treehash, filelist, basename, tmpfolder="", | 191 | def archive_files(repo, treehash, filelist, basename, tmpfolder=None, |
184 | archive="tbz"): | 192 | archive="tbz"): |
185 | '''Archive list of files into tarball. | 193 | '''Archive list of files into tarball. |
186 | @param repo Path to repository root. | 194 | @param repo Path to repository root. |
187 | @param treehash Hash identifying the tree. | 195 | @param treehash Hash identifying the tree. |
@@ -197,20 +205,20 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="", | |||
197 | @return Output filename. | 205 | @return Output filename. |
198 | ''' | 206 | ''' |
199 | 207 | ||
200 | if tmpfolder == "": | 208 | if tmpfolder is None: |
201 | temp_remove = True | 209 | temp_remove = True |
202 | tmpfolder = tempfile.mkdtemp() | 210 | tmpfolder = tempfile.mkdtemp() |
203 | else: | 211 | else: |
204 | temp_remove = False | 212 | temp_remove = False |
205 | workfolder = scrape_files(repo, treehash, filelist, | 213 | workfolder = scrape_files( |
206 | os.path.join(tmpfolder, basename))[0] | 214 | repo, treehash, filelist, os.path.join(tmpfolder, basename))[0] |
207 | if basename is "": | 215 | if basename is "": |
208 | return "" | 216 | return "" |
209 | print("Archiving files from repository") | 217 | print("Archiving files from repository") |
210 | if archive == "7z": | 218 | if archive == "7z": |
211 | outfile = basename + ".7z" | 219 | outfile = basename + ".7z" |
212 | output = subprocess.Popen(["7z", "a", | 220 | output = subprocess.Popen( |
213 | os.path.join(os.getcwd(), basename + ".7z"), basename], | 221 | ["7z", "a", os.path.join(os.getcwd(), basename + ".7z"), basename], |
214 | cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 222 | cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
215 | output.communicate() | 223 | output.communicate() |
216 | elif archive == "tbz": | 224 | elif archive == "tbz": |