diff options
-rwxr-xr-x | utils/common/gitscraper.py | 37 |
1 files changed, 30 insertions, 7 deletions
diff --git a/utils/common/gitscraper.py b/utils/common/gitscraper.py index 85a8467617..2ced9ec07e 100755 --- a/utils/common/gitscraper.py +++ b/utils/common/gitscraper.py | |||
@@ -42,8 +42,8 @@ def get_refs(repo): | |||
42 | @return Dict matching hashes to each ref. | 42 | @return Dict matching hashes to each ref. |
43 | ''' | 43 | ''' |
44 | print("Getting list of refs") | 44 | print("Getting list of refs") |
45 | output = subprocess.Popen(["git", "show-ref"], stdout=subprocess.PIPE, | 45 | output = subprocess.Popen(["git", "show-ref", "--abbrev"], |
46 | stderr=subprocess.PIPE, cwd=repo) | 46 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) |
47 | cmdout = output.communicate() | 47 | cmdout = output.communicate() |
48 | refs = {} | 48 | refs = {} |
49 | 49 | ||
@@ -99,6 +99,21 @@ def get_lstree(repo, start, filterlist=[]): | |||
99 | return objects | 99 | return objects |
100 | 100 | ||
101 | 101 | ||
102 | def get_file_timestamp(repo, tree, filename): | ||
103 | '''Get timestamp for a file. | ||
104 | @param repo Path to repository root. | ||
105 | @param tree Hash of tree to use. | ||
106 | @param filename Filename in tree | ||
107 | @return Timestamp as string. | ||
108 | ''' | ||
109 | output = subprocess.Popen( | ||
110 | ["git", "log", "--format=%ai", "-n", "1", tree, filename], | ||
111 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo) | ||
112 | cmdout = output.communicate() | ||
113 | |||
114 | return cmdout[0].decode().rstrip() | ||
115 | |||
116 | |||
102 | def get_object(repo, blob, destfile): | 117 | def get_object(repo, blob, destfile): |
103 | '''Get an identified object from the repository. | 118 | '''Get an identified object from the repository. |
104 | @param repo Path to repository root. | 119 | @param repo Path to repository root. |
@@ -138,7 +153,7 @@ def describe_treehash(repo, treehash): | |||
138 | return cmdout[0].rstrip() | 153 | return cmdout[0].rstrip() |
139 | 154 | ||
140 | 155 | ||
141 | def scrape_files(repo, treehash, filelist, dest=""): | 156 | def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]): |
142 | '''Scrape list of files from repository. | 157 | '''Scrape list of files from repository. |
143 | @param repo Path to repository root. | 158 | @param repo Path to repository root. |
144 | @param treehash Hash identifying the tree. | 159 | @param treehash Hash identifying the tree. |
@@ -146,17 +161,23 @@ def scrape_files(repo, treehash, filelist, dest=""): | |||
146 | @param dest Destination path for files. Files will get retrieved with full | 161 | @param dest Destination path for files. Files will get retrieved with full |
147 | path from the repository, and the folder structure will get | 162 | path from the repository, and the folder structure will get |
148 | created below dest as necessary. | 163 | created below dest as necessary. |
149 | @return Destination path. | 164 | @param timestamp_files List of files to also get the last modified date. |
165 | WARNING: this is SLOW! | ||
166 | @return Destination path, filename:timestamp dict. | ||
150 | ''' | 167 | ''' |
151 | print("Scraping files from repository") | 168 | print("Scraping files from repository") |
152 | 169 | ||
153 | if dest == "": | 170 | if dest == "": |
154 | dest = tempfile.mkdtemp() | 171 | dest = tempfile.mkdtemp() |
155 | treeobjects = get_lstree(repo, treehash, filelist) | 172 | treeobjects = get_lstree(repo, treehash, filelist) |
173 | timestamps = {} | ||
156 | for obj in treeobjects: | 174 | for obj in treeobjects: |
157 | get_object(repo, treeobjects[obj], os.path.join(dest.encode(), obj)) | 175 | get_object(repo, treeobjects[obj], os.path.join(dest.encode(), obj)) |
176 | for f in timestamp_files: | ||
177 | if obj.find(f) == 0: | ||
178 | timestamps[obj] = get_file_timestamp(repo, treehash, obj) | ||
158 | 179 | ||
159 | return dest | 180 | return [dest, timestamps] |
160 | 181 | ||
161 | 182 | ||
162 | def archive_files(repo, treehash, filelist, basename, tmpfolder="", | 183 | def archive_files(repo, treehash, filelist, basename, tmpfolder="", |
@@ -182,7 +203,7 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="", | |||
182 | else: | 203 | else: |
183 | temp_remove = False | 204 | temp_remove = False |
184 | workfolder = scrape_files(repo, treehash, filelist, | 205 | workfolder = scrape_files(repo, treehash, filelist, |
185 | os.path.join(tmpfolder, basename)) | 206 | os.path.join(tmpfolder, basename))[0] |
186 | if basename is "": | 207 | if basename is "": |
187 | return "" | 208 | return "" |
188 | print("Archiving files from repository") | 209 | print("Archiving files from repository") |
@@ -192,11 +213,13 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="", | |||
192 | os.path.join(os.getcwd(), basename + ".7z"), basename], | 213 | os.path.join(os.getcwd(), basename + ".7z"), basename], |
193 | cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 214 | cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
194 | output.communicate() | 215 | output.communicate() |
195 | else: | 216 | elif archive == "tbz": |
196 | outfile = basename + ".tar.bz2" | 217 | outfile = basename + ".tar.bz2" |
197 | tf = tarfile.open(outfile, "w:bz2") | 218 | tf = tarfile.open(outfile, "w:bz2") |
198 | tf.add(workfolder, basename) | 219 | tf.add(workfolder, basename) |
199 | tf.close() | 220 | tf.close() |
221 | else: | ||
222 | print("Files not archived") | ||
200 | if tmpfolder != workfolder: | 223 | if tmpfolder != workfolder: |
201 | shutil.rmtree(workfolder) | 224 | shutil.rmtree(workfolder) |
202 | if temp_remove: | 225 | if temp_remove: |