Coverage for src/debputy/plugin/debputy/package_processors.py: 54%
168 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
1import contextlib
2import functools
3import gzip
4import os
5import re
6import subprocess
7from contextlib import ExitStack
8from typing import Optional, Iterator, IO, Any, List, Dict, Callable, Union
10from debputy.plugin.api import VirtualPath
11from debputy.util import _error, xargs, escape_shell, _info, assume_not_none
14@contextlib.contextmanager
15def _open_maybe_gzip(path: VirtualPath) -> Iterator[Union[IO[bytes], gzip.GzipFile]]:
16 if path.name.endswith(".gz"):
17 with gzip.GzipFile(path.fs_path, "rb") as fd:
18 yield fd
19 else:
20 with path.open(byte_io=True) as fd:
21 yield fd
24_SO_LINK_RE = re.compile(rb"[.]so\s+(.*)\s*")
25_LA_DEP_LIB_RE = re.compile(rb"'.+'")
28def _detect_so_link(path: VirtualPath) -> Optional[str]:
29 so_link_re = _SO_LINK_RE
30 with _open_maybe_gzip(path) as fd:
31 for line in fd:
32 m = so_link_re.search(line)
33 if m:
34 return m.group(1).decode("utf-8")
35 return None
38def _replace_with_symlink(path: VirtualPath, so_link_target: str) -> None:
39 adjusted_target = so_link_target
40 parent_dir = path.parent_dir
41 assert parent_dir is not None # For the type checking
42 if parent_dir.name == os.path.dirname(adjusted_target):
43 # Avoid man8/../man8/foo links
44 adjusted_target = os.path.basename(adjusted_target)
45 elif "/" in so_link_target:
46 # symlinks and so links have a different base directory when the link has a "/".
47 # Adjust with an extra "../" to align the result
48 adjusted_target = "../" + adjusted_target
50 path.unlink()
51 parent_dir.add_symlink(path.name, adjusted_target)
54@functools.lru_cache(1)
55def _has_man_recode() -> bool:
56 # Ideally, we would just use shutil.which or something like that.
57 # Unfortunately, in debhelper, we experienced problems with which
58 # returning "yes" for a man tool that actually could not be run
59 # on salsa CI.
60 #
61 # Therefore, we adopt the logic of dh_installman to run the tool
62 # with --help to confirm it is not broken, because no one could
63 # figure out what happened in the salsa CI and my life is still
64 # too short to figure it out.
65 try:
66 subprocess.check_call(
67 ["man-recode", "--help"],
68 stdin=subprocess.DEVNULL,
69 stdout=subprocess.DEVNULL,
70 stderr=subprocess.DEVNULL,
71 restore_signals=True,
72 )
73 except subprocess.CalledProcessError:
74 return False
75 return True
78def process_manpages(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None:
79 man_dir = fs_root.lookup("./usr/share/man")
80 if not man_dir:
81 return
83 re_encode = []
84 for path in (p for p in man_dir.all_paths() if p.is_file and p.has_fs_path):
85 size = path.size
86 if size == 0:
87 continue
88 so_link_target = None
89 if size <= 1024:
90 # debhelper has a 1024 byte guard on the basis that ".so file tend to be small".
91 # That guard worked well for debhelper, so lets keep it for now on that basis alone.
92 so_link_target = _detect_so_link(path)
93 if so_link_target:
94 _replace_with_symlink(path, so_link_target)
95 else:
96 re_encode.append(path)
98 if not re_encode or not _has_man_recode():
99 return
101 with ExitStack() as manager:
102 manpages = [
103 manager.enter_context(p.replace_fs_path_content()) for p in re_encode
104 ]
105 static_cmd = ["man-recode", "--to-code", "UTF-8", "--suffix", ".encoded"]
106 for cmd in xargs(static_cmd, manpages):
107 _info(f"Ensuring manpages have utf-8 encoding via: {escape_shell(*cmd)}")
108 try:
109 subprocess.check_call(
110 cmd,
111 stdin=subprocess.DEVNULL,
112 restore_signals=True,
113 )
114 except subprocess.CalledProcessError:
115 _error(
116 "The man-recode process failed. Please review the output of `man-recode` to understand"
117 " what went wrong."
118 )
119 for manpage in manpages:
120 dest_name = manpage
121 if dest_name.endswith(".gz"):
122 dest_name = dest_name[:-3]
123 os.rename(f"{dest_name}.encoded", manpage)
126def _filter_compress_paths() -> Callable[[VirtualPath], Iterator[VirtualPath]]:
127 ignore_dir_basenames = {
128 "_sources",
129 }
130 ignore_basenames = {
131 ".htaccess",
132 "index.sgml",
133 "objects.inv",
134 "search_index.json",
135 "copyright",
136 }
137 ignore_extensions = {
138 ".htm",
139 ".html",
140 ".xhtml",
141 ".gif",
142 ".png",
143 ".jpg",
144 ".jpeg",
145 ".gz",
146 ".taz",
147 ".tgz",
148 ".z",
149 ".bz2",
150 ".epub",
151 ".jar",
152 ".zip",
153 ".odg",
154 ".odp",
155 ".odt",
156 ".css",
157 ".xz",
158 ".lz",
159 ".lzma",
160 ".haddock",
161 ".hs",
162 ".woff",
163 ".woff2",
164 ".svg",
165 ".svgz",
166 ".js",
167 ".devhelp2",
168 ".map", # Technically, dh_compress has this one case-sensitive
169 }
170 ignore_special_cases = ("-gz", "-z", "_z")
172 def _filtered_walk(path: VirtualPath) -> Iterator[VirtualPath]:
173 for path, children in path.walk():
174 if path.name in ignore_dir_basenames: 174 ↛ 175line 174 didn't jump to line 175, because the condition on line 174 was never true
175 children.clear()
176 continue
177 if path.is_dir and path.name == "examples": 177 ↛ 179line 177 didn't jump to line 179, because the condition on line 177 was never true
178 # Ignore anything beneath /usr/share/doc/*/examples
179 parent = path.parent_dir
180 grand_parent = parent.parent_dir if parent else None
181 if grand_parent and grand_parent.absolute == "/usr/share/doc":
182 children.clear()
183 continue
184 name = path.name
185 if (
186 path.is_symlink
187 or not path.is_file
188 or name in ignore_basenames
189 or not path.has_fs_path
190 ):
191 continue
193 name_lc = name.lower()
194 _, ext = os.path.splitext(name_lc)
196 if ext in ignore_extensions or name_lc.endswith(ignore_special_cases): 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true
197 continue
198 yield path
200 return _filtered_walk
203def _find_compressable_paths(fs_root: VirtualPath) -> Iterator[VirtualPath]:
204 path_filter = _filter_compress_paths()
206 for p, compress_size_threshold in (
207 ("./usr/share/info", 0),
208 ("./usr/share/man", 0),
209 ("./usr/share/doc", 4096),
210 ):
211 path = fs_root.lookup(p)
212 if path is None:
213 continue
214 paths = path_filter(path)
215 if compress_size_threshold: 215 ↛ 218line 215 didn't jump to line 218, because the condition on line 215 was never true
216 # The special-case for changelog and NEWS is from dh_compress. Generally these files
217 # have always been compressed regardless of their size.
218 paths = (
219 p
220 for p in paths
221 if p.size > compress_size_threshold
222 or p.name.startswith(("changelog", "NEWS"))
223 )
224 yield from paths
225 x11_path = fs_root.lookup("./usr/share/fonts/X11")
226 if x11_path: 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true
227 yield from (
228 p for p in x11_path.all_paths() if p.is_file and p.name.endswith(".pcf")
229 )
232def apply_compression(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None:
233 # TODO: Support hardlinks
234 compressed_files: Dict[str, str] = {}
235 for path in _find_compressable_paths(fs_root):
236 parent_dir = assume_not_none(path.parent_dir)
237 with parent_dir.add_file(f"{path.name}.gz", mtime=path.mtime) as new_file, open(
238 new_file.fs_path, "wb"
239 ) as fd:
240 try:
241 subprocess.check_call(["gzip", "-9nc", path.fs_path], stdout=fd)
242 except subprocess.CalledProcessError:
243 full_command = f"gzip -9nc {escape_shell(path.fs_path)} > {escape_shell(new_file.fs_path)}"
244 _error(
245 f"The compression of {path.path} failed. Please review the error message from gzip to"
246 f" understand what went wrong. Full command was: {full_command}"
247 )
248 compressed_files[path.path] = new_file.path
249 del parent_dir[path.name]
251 all_remaining_symlinks = {p.path: p for p in fs_root.all_paths() if p.is_symlink}
252 changed = True
253 while changed:
254 changed = False
255 remaining: List[VirtualPath] = list(all_remaining_symlinks.values())
256 for symlink in remaining:
257 target = symlink.readlink()
258 dir_target, basename_target = os.path.split(target)
259 new_basename_target = f"{basename_target}.gz"
260 symlink_parent_dir = assume_not_none(symlink.parent_dir)
261 dir_path = symlink_parent_dir
262 if dir_target != "":
263 dir_path = dir_path.lookup(dir_target)
264 if ( 264 ↛ 269line 264 didn't jump to line 269
265 not dir_path
266 or basename_target in dir_path
267 or new_basename_target not in dir_path
268 ):
269 continue
270 del all_remaining_symlinks[symlink.path]
271 changed = True
273 new_link_name = (
274 f"{symlink.name}.gz"
275 if not symlink.name.endswith(".gz")
276 else symlink.name
277 )
278 symlink_parent_dir.add_symlink(
279 new_link_name, os.path.join(dir_target, new_basename_target)
280 )
281 symlink.unlink()
284def _la_files(fs_root: VirtualPath) -> Iterator[VirtualPath]:
285 lib_dir = fs_root.lookup("/usr/lib")
286 if not lib_dir:
287 return
288 # Original code only iterators directly in /usr/lib. To be a faithful conversion, we do the same
289 # here.
290 # Eagerly resolve the list as the replacement can trigger a runtime error otherwise
291 paths = list(lib_dir.iterdir)
292 yield from (p for p in paths if p.is_file and p.name.endswith(".la"))
295# Conceptually, the same feature that dh_gnome provides.
296# The clean_la_files function based on the dh_gnome version written by Luca Falavigna in 2010,
297# who in turn references a Makefile version of the feature.
298# https://salsa.debian.org/gnome-team/gnome-pkg-tools/-/commit/2868e1e41ea45443b0fb340bf4c71c4de87d4a5b
299def clean_la_files(
300 fs_root: VirtualPath,
301 _unused1: Any,
302 _unused2: Any,
303) -> None:
304 for path in _la_files(fs_root):
305 buffer = []
306 with path.open(byte_io=True) as fd:
307 replace_file = False
308 for line in fd:
309 if line.startswith(b"dependency_libs"):
310 replacement = _LA_DEP_LIB_RE.sub(b"''", line)
311 if replacement != line:
312 replace_file = True
313 line = replacement
314 buffer.append(line)
316 if not replace_file:
317 continue
318 _info(f"Clearing the dependency_libs line in {path.path}")
319 with path.replace_fs_path_content() as fs_path, open(fs_path, "wb") as wfd:
320 wfd.writelines(buffer)