2999
|
1 /* db.c: an external database to avoid filesystem lookups. |
|
2 |
|
3 Copyright (C) 1994, 95, 96, 97 Karl Berry. |
|
4 |
|
5 This library is free software; you can redistribute it and/or |
|
6 modify it under the terms of the GNU Library General Public |
|
7 License as published by the Free Software Foundation; either |
|
8 version 2 of the License, or (at your option) any later version. |
|
9 |
|
10 This library is distributed in the hope that it will be useful, |
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 Library General Public License for more details. |
|
14 |
|
15 You should have received a copy of the GNU Library General Public |
|
16 License along with this library; if not, write to the Free Software |
|
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
|
18 |
4291
|
19 /* This gives us a better chance of finding a prototype for basename |
|
20 on some systems. */ |
|
21 |
|
22 #if ! defined (_GNU_SOURCE) |
|
23 #define _GNU_SOURCE |
|
24 #endif |
|
25 |
2999
|
26 #include <kpathsea/config.h> |
|
27 #include <kpathsea/absolute.h> |
|
28 #include <kpathsea/c-fopen.h> |
|
29 #include <kpathsea/c-pathch.h> |
|
30 #include <kpathsea/db.h> |
|
31 #include <kpathsea/hash.h> |
|
32 #include <kpathsea/line.h> |
|
33 #include <kpathsea/pathsearch.h> |
|
34 #include <kpathsea/readable.h> |
|
35 #include <kpathsea/str-list.h> |
|
36 #include <kpathsea/tex-file.h> |
|
37 #include <kpathsea/variable.h> |
|
38 |
|
39 static hash_table_type db; /* The hash table for all the ls-R's. */ |
3172
|
40 /* SMALL: The old size of the hash table was 7603, with the assumption |
|
41 that a minimal ls-R bas about 3500 entries. But a typical ls-R will |
|
42 be more like double that size. */ |
2999
|
43 #ifndef DB_HASH_SIZE |
3172
|
44 #define DB_HASH_SIZE 15991 |
2999
|
45 #endif |
|
46 #ifndef DB_NAME |
|
47 #define DB_NAME "ls-R" |
|
48 #endif |
|
49 |
|
50 static hash_table_type alias_db; |
|
51 #ifndef ALIAS_NAME |
|
52 #define ALIAS_NAME "aliases" |
|
53 #endif |
|
54 #ifndef ALIAS_HASH_SIZE |
|
55 #define ALIAS_HASH_SIZE 1009 |
|
56 #endif |
|
57 |
|
58 static str_list_type db_dir_list; |
|
59 |
|
60 /* If DIRNAME contains any element beginning with a `.' (that is more |
|
61 than just `./'), return true. This is to allow ``hidden'' |
|
62 directories -- ones that don't get searched. */ |
|
63 |
|
64 static boolean |
|
65 ignore_dir_p P1C(const_string, dirname) |
|
66 { |
|
67 const_string dot_pos = dirname; |
|
68 |
|
69 while ((dot_pos = strchr (dot_pos + 1, '.'))) { |
|
70 /* If / before and no / after, skip it. */ |
|
71 if (IS_DIR_SEP (dot_pos[-1]) && dot_pos[1] && !IS_DIR_SEP (dot_pos[1])) |
|
72 return true; |
|
73 } |
|
74 |
|
75 return false; |
|
76 } |
|
77 |
|
78 /* If no DB_FILENAME, return false (maybe they aren't using this feature). |
|
79 Otherwise, add entries from DB_FILENAME to TABLE, and return true. */ |
|
80 |
|
81 static boolean |
|
82 db_build P2C(hash_table_type *, table, const_string, db_filename) |
|
83 { |
|
84 string line; |
|
85 unsigned dir_count = 0, file_count = 0, ignore_dir_count = 0; |
|
86 unsigned len = strlen (db_filename) - sizeof (DB_NAME) + 1; /* Keep the /. */ |
|
87 string top_dir = xmalloc (len + 1); |
|
88 string cur_dir = NULL; /* First thing in ls-R might be a filename. */ |
|
89 FILE *db_file = fopen (db_filename, FOPEN_R_MODE); |
|
90 |
|
91 strncpy (top_dir, db_filename, len); |
|
92 top_dir[len] = 0; |
|
93 |
|
94 if (db_file) { |
|
95 while ((line = read_line (db_file)) != NULL) { |
|
96 len = strlen (line); |
|
97 |
|
98 /* A line like `/foo:' = new dir foo. Allow both absolute (/...) |
|
99 and explicitly relative (./...) names here. It's a kludge to |
|
100 pass in the directory name with the trailing : still attached, |
|
101 but it doesn't actually hurt. */ |
|
102 if (len > 0 && line[len - 1] == ':' && kpse_absolute_p (line, true)) { |
|
103 /* New directory line. */ |
|
104 if (!ignore_dir_p (line)) { |
|
105 /* If they gave a relative name, prepend full directory name now. */ |
|
106 line[len - 1] = DIR_SEP; |
|
107 /* Skip over leading `./', it confuses `match' and is just a |
|
108 waste of space, anyway. This will lose on `../', but `match' |
|
109 won't work there, either, so it doesn't matter. */ |
|
110 cur_dir = *line == '.' ? concat (top_dir, line + 2) : xstrdup (line); |
|
111 dir_count++; |
|
112 } else { |
|
113 cur_dir = NULL; |
|
114 ignore_dir_count++; |
|
115 } |
|
116 |
|
117 /* Ignore blank, `.' and `..' lines. */ |
|
118 } else if (*line != 0 && cur_dir /* a file line? */ |
|
119 && !(*line == '.' |
|
120 && (line[1] == '0' || (line[1] == '.' && line[2] == 0)))) |
|
121 {/* Make a new hash table entry with a key of `line' and a data |
|
122 of `cur_dir'. An already-existing identical key is ok, since |
|
123 a file named `foo' can be in more than one directory. Share |
|
124 `cur_dir' among all its files (and hence never free it). */ |
|
125 hash_insert (table, xstrdup (line), cur_dir); |
|
126 file_count++; |
|
127 |
|
128 } /* else ignore blank lines or top-level files |
|
129 or files in ignored directories*/ |
|
130 |
|
131 free (line); |
|
132 } |
|
133 |
|
134 xfclose (db_file, db_filename); |
|
135 |
|
136 if (file_count == 0) { |
|
137 WARNING1 ("kpathsea: No usable entries in %s", db_filename); |
|
138 WARNING ("kpathsea: See the manual for how to generate ls-R"); |
|
139 db_file = NULL; |
|
140 } else { |
|
141 str_list_add (&db_dir_list, xstrdup (top_dir)); |
|
142 } |
|
143 |
|
144 #ifdef KPSE_DEBUG |
|
145 if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) { |
|
146 /* Don't make this a debugging bit, since the output is so |
|
147 voluminous, and being able to specify -1 is too useful. |
|
148 Instead, let people who want it run the program under |
|
149 a debugger and change the variable that way. */ |
|
150 boolean hash_summary_only = true; |
|
151 |
|
152 DEBUGF4 ("%s: %u entries in %d directories (%d hidden).\n", |
|
153 db_filename, file_count, dir_count, ignore_dir_count); |
|
154 DEBUGF ("ls-R hash table:"); |
|
155 hash_print (*table, hash_summary_only); |
|
156 fflush (stderr); |
|
157 } |
|
158 #endif /* KPSE_DEBUG */ |
|
159 } |
|
160 |
|
161 free (top_dir); |
|
162 |
|
163 return db_file != NULL; |
|
164 } |
|
165 |
|
166 |
|
167 /* Insert FNAME into the hash table. This is for files that get built |
|
168 during a run. We wouldn't want to reread all of ls-R, even if it got |
|
169 rebuilt. */ |
|
170 |
|
171 void |
|
172 kpse_db_insert P1C(const_string, passed_fname) |
|
173 { |
|
174 /* We might not have found ls-R, or even had occasion to look for it |
|
175 yet, so do nothing if we have no hash table. */ |
|
176 if (db.buckets) { |
|
177 const_string dir_part; |
|
178 string fname = xstrdup (passed_fname); |
3172
|
179 string baseptr = (string)basename (fname); |
2999
|
180 const_string file_part = xstrdup (baseptr); |
|
181 |
|
182 *baseptr = '\0'; /* Chop off the filename. */ |
|
183 dir_part = fname; /* That leaves the dir, with the trailing /. */ |
|
184 |
|
185 hash_insert (&db, file_part, dir_part); |
|
186 } |
|
187 } |
|
188 |
|
189 /* Return true if FILENAME could be in PATH_ELT, i.e., if the directory |
|
190 part of FILENAME matches PATH_ELT. Have to consider // wildcards, but |
|
191 $ and ~ expansion have already been done. */ |
|
192 |
|
193 static boolean |
|
194 match P2C(const_string, filename, const_string, path_elt) |
|
195 { |
|
196 const_string original_filename = filename; |
|
197 boolean matched = false; |
|
198 |
3172
|
199 for (; *filename && *path_elt; filename++, path_elt++) { |
2999
|
200 if (FILECHARCASEEQ (*filename, *path_elt)) /* normal character match */ |
|
201 ; |
|
202 |
|
203 else if (IS_DIR_SEP (*path_elt) /* at // */ |
|
204 && original_filename < filename && IS_DIR_SEP (path_elt[-1])) { |
|
205 while (IS_DIR_SEP (*path_elt)) |
|
206 path_elt++; /* get past second and any subsequent /'s */ |
|
207 if (*path_elt == 0) { |
|
208 /* Trailing //, matches anything. We could make this part of the |
|
209 other case, but it seems pointless to do the extra work. */ |
|
210 matched = true; |
3172
|
211 break; |
2999
|
212 } else { |
|
213 /* Intermediate //, have to match rest of PATH_ELT. */ |
|
214 for (; !matched && *filename; filename++) { |
|
215 /* Try matching at each possible character. */ |
|
216 if (IS_DIR_SEP (filename[-1]) |
|
217 && FILECHARCASEEQ (*filename, *path_elt)) |
|
218 matched = match (filename, path_elt); |
|
219 } |
3172
|
220 /* Prevent filename++ when *filename='\0'. */ |
|
221 break; |
2999
|
222 } |
|
223 } |
|
224 |
|
225 else /* normal character nonmatch, quit */ |
3172
|
226 break; |
2999
|
227 } |
|
228 |
|
229 /* If we've reached the end of PATH_ELT, check that we're at the last |
|
230 component of FILENAME, we've matched. */ |
|
231 if (!matched && *path_elt == 0) { |
|
232 /* Probably PATH_ELT ended with `vf' or some such, and FILENAME ends |
|
233 with `vf/ptmr.vf'. In that case, we'll be at a directory |
|
234 separator. On the other hand, if PATH_ELT ended with a / (as in |
|
235 `vf/'), FILENAME being the same `vf/ptmr.vf', we'll be at the |
|
236 `p'. Upshot: if we're at a dir separator in FILENAME, skip it. |
|
237 But if not, that's ok, as long as there are no more dir separators. */ |
|
238 if (IS_DIR_SEP (*filename)) |
|
239 filename++; |
|
240 |
|
241 while (*filename && !IS_DIR_SEP (*filename)) |
|
242 filename++; |
|
243 matched = *filename == 0; |
|
244 } |
|
245 |
|
246 return matched; |
|
247 } |
|
248 |
|
249 |
|
250 /* If DB_DIR is a prefix of PATH_ELT, return true; otherwise false. |
|
251 That is, the question is whether to try the db for a file looked up |
|
252 in PATH_ELT. If PATH_ELT == ".", for example, the answer is no. If |
|
253 PATH_ELT == "/usr/local/lib/texmf/fonts//tfm", the answer is yes. |
|
254 |
|
255 In practice, ls-R is only needed for lengthy subdirectory |
|
256 comparisons, but there's no gain to checking PATH_ELT to see if it is |
|
257 a subdir match, since the only way to do that is to do a string |
|
258 search in it, which is all we do anyway. */ |
|
259 |
|
260 static boolean |
|
261 elt_in_db P2C(const_string, db_dir, const_string, path_elt) |
|
262 { |
|
263 boolean found = false; |
|
264 |
|
265 while (!found && FILECHARCASEEQ (*db_dir++, *path_elt++)) { |
|
266 /* If we've matched the entire db directory, it's good. */ |
|
267 if (*db_dir == 0) |
|
268 found = true; |
|
269 |
|
270 /* If we've reached the end of PATH_ELT, but not the end of the db |
|
271 directory, it's no good. */ |
|
272 else if (*path_elt == 0) |
|
273 break; |
|
274 } |
|
275 |
|
276 return found; |
|
277 } |
|
278 |
|
279 /* If ALIAS_FILENAME exists, read it into TABLE. */ |
|
280 |
|
281 static boolean |
|
282 alias_build P2C(hash_table_type *, table, const_string, alias_filename) |
|
283 { |
|
284 string line, real, alias; |
|
285 unsigned count = 0; |
|
286 FILE *alias_file = fopen (alias_filename, FOPEN_R_MODE); |
|
287 |
|
288 if (alias_file) { |
|
289 while ((line = read_line (alias_file)) != NULL) { |
|
290 /* comments or empty */ |
|
291 if (*line == 0 || *line == '%' || *line == '#') { |
|
292 ; |
|
293 } else { |
|
294 /* Each line should have two fields: realname aliasname. */ |
|
295 real = line; |
|
296 while (*real && ISSPACE (*real)) |
|
297 real++; |
|
298 alias = real; |
|
299 while (*alias && !ISSPACE (*alias)) |
|
300 alias++; |
|
301 *alias++ = 0; |
|
302 while (*alias && ISSPACE (*alias)) |
|
303 alias++; |
|
304 /* Is the check for errors strong enough? Should we warn the user |
|
305 for potential errors? */ |
|
306 if (strlen (real) != 0 && strlen (alias) != 0) { |
|
307 hash_insert (table, xstrdup (alias), xstrdup (real)); |
|
308 count++; |
|
309 } |
|
310 } |
|
311 free (line); |
|
312 } |
|
313 |
|
314 #ifdef KPSE_DEBUG |
|
315 if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) { |
|
316 /* As with ls-R above ... */ |
|
317 boolean hash_summary_only = true; |
|
318 DEBUGF2 ("%s: %u aliases.\n", alias_filename, count); |
|
319 DEBUGF ("alias hash table:"); |
|
320 hash_print (*table, hash_summary_only); |
|
321 fflush (stderr); |
|
322 } |
|
323 #endif /* KPSE_DEBUG */ |
|
324 |
|
325 xfclose (alias_file, alias_filename); |
|
326 } |
|
327 |
|
328 return alias_file != NULL; |
|
329 } |
|
330 |
|
331 /* Initialize the path for ls-R files, and read them all into the hash |
|
332 table `db'. If no usable ls-R's are found, set db.buckets to NULL. */ |
|
333 |
|
334 void |
|
335 kpse_init_db P1H(void) |
|
336 { |
|
337 boolean ok = false; |
|
338 const_string db_path = kpse_init_format (kpse_db_format); |
|
339 string *db_files = kpse_all_path_search (db_path, DB_NAME); |
|
340 string *orig_db_files = db_files; |
|
341 |
|
342 /* Must do this after the path searching (which ends up calling |
|
343 kpse_db_search recursively), so db.buckets stays NULL. */ |
|
344 db = hash_create (DB_HASH_SIZE); |
|
345 |
|
346 while (db_files && *db_files) { |
|
347 if (db_build (&db, *db_files)) |
|
348 ok = true; |
|
349 free (*db_files); |
|
350 db_files++; |
|
351 } |
|
352 |
|
353 if (!ok) { |
|
354 /* If db can't be built, leave `size' nonzero (so we don't |
|
355 rebuild it), but clear `buckets' (so we don't look in it). */ |
|
356 free (db.buckets); |
|
357 db.buckets = NULL; |
|
358 } |
|
359 |
|
360 free (orig_db_files); |
|
361 |
|
362 /* Add the content of any alias databases. There may exist more than |
|
363 one alias file along DB_NAME files. This duplicates the above code |
|
364 -- should be a function. */ |
|
365 ok = false; |
|
366 db_files = kpse_all_path_search (db_path, ALIAS_NAME); |
|
367 orig_db_files = db_files; |
|
368 |
|
369 alias_db = hash_create (ALIAS_HASH_SIZE); |
|
370 |
|
371 while (db_files && *db_files) { |
|
372 if (alias_build (&alias_db, *db_files)) |
|
373 ok = true; |
|
374 free (*db_files); |
|
375 db_files++; |
|
376 } |
|
377 |
|
378 if (!ok) { |
|
379 free (alias_db.buckets); |
|
380 alias_db.buckets = NULL; |
|
381 } |
|
382 |
|
383 free (orig_db_files); |
|
384 } |
|
385 |
|
386 /* Avoid doing anything if this PATH_ELT is irrelevant to the databases. */ |
|
387 |
|
388 str_list_type * |
|
389 kpse_db_search P3C(const_string, name, const_string, orig_path_elt, |
|
390 boolean, all) |
|
391 { |
|
392 string *db_dirs, *orig_dirs, *r; |
|
393 const_string last_slash; |
|
394 string path_elt; |
|
395 boolean done; |
|
396 str_list_type *ret; |
|
397 unsigned e; |
|
398 string *aliases = NULL; |
|
399 boolean relevant = false; |
|
400 |
|
401 /* If we failed to build the database (or if this is the recursive |
|
402 call to build the db path), quit. */ |
|
403 if (db.buckets == NULL) |
|
404 return NULL; |
|
405 |
|
406 /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we |
|
407 won't find it unless we change NAME to just `cmr10.pk' and append |
|
408 `/dpi600' to PATH_ELT. We are justified in using a literal `/' |
|
409 here, since that's what tex-glyph.c unconditionally uses in |
|
410 DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that |
|
411 should never happen. */ |
|
412 last_slash = strrchr (name, '/'); |
|
413 if (last_slash && last_slash != name) { |
|
414 unsigned len = last_slash - name + 1; |
|
415 string dir_part = xmalloc (len); |
|
416 strncpy (dir_part, name, len - 1); |
|
417 dir_part[len - 1] = 0; |
|
418 path_elt = concat3 (orig_path_elt, "/", dir_part); |
|
419 name = last_slash + 1; |
|
420 } else |
|
421 path_elt = (string) orig_path_elt; |
|
422 |
|
423 /* Don't bother doing any lookups if this `path_elt' isn't covered by |
|
424 any of database directories. We do this not so much because the |
|
425 extra couple of hash lookups matter -- they don't -- but rather |
|
426 because we want to return NULL in this case, so path_search can |
|
427 know to do a disk search. */ |
|
428 for (e = 0; !relevant && e < STR_LIST_LENGTH (db_dir_list); e++) { |
|
429 relevant = elt_in_db (STR_LIST_ELT (db_dir_list, e), path_elt); |
|
430 } |
|
431 if (!relevant) |
|
432 return NULL; |
|
433 |
|
434 /* If we have aliases for this name, use them. */ |
|
435 if (alias_db.buckets) |
|
436 aliases = hash_lookup (alias_db, name); |
|
437 |
|
438 if (!aliases) { |
|
439 aliases = XTALLOC1 (string); |
|
440 aliases[0] = NULL; |
|
441 } |
|
442 { /* Push aliases up by one and insert the original name at the front. */ |
|
443 unsigned i; |
|
444 unsigned len = 1; /* Have NULL element already allocated. */ |
|
445 for (r = aliases; *r; r++) |
|
446 len++; |
|
447 XRETALLOC (aliases, len + 1, string); |
|
448 for (i = len; i > 0; i--) { |
|
449 aliases[i] = aliases[i - 1]; |
|
450 } |
|
451 aliases[0] = (string) name; |
|
452 } |
|
453 |
|
454 done = false; |
|
455 for (r = aliases; !done && *r; r++) { |
|
456 string try = *r; |
|
457 |
|
458 /* We have an ls-R db. Look up `try'. */ |
|
459 orig_dirs = db_dirs = hash_lookup (db, try); |
|
460 |
|
461 ret = XTALLOC1 (str_list_type); |
|
462 *ret = str_list_init (); |
|
463 |
|
464 /* For each filename found, see if it matches the path element. For |
|
465 example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk, |
|
466 and the path looks like .../cx, we don't want the ricoh file. */ |
|
467 while (!done && db_dirs && *db_dirs) { |
|
468 string db_file = concat (*db_dirs, try); |
|
469 boolean matched = match (db_file, path_elt); |
|
470 |
3172
|
471 #ifdef KPSE_DEBUG |
2999
|
472 if (KPSE_DEBUG_P (KPSE_DEBUG_SEARCH)) |
|
473 DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path_elt, matched); |
3172
|
474 #endif |
2999
|
475 |
|
476 /* We got a hit in the database. Now see if the file actually |
|
477 exists, possibly under an alias. */ |
|
478 if (matched) { |
|
479 string found = NULL; |
|
480 if (kpse_readable_file (db_file)) { |
|
481 found = db_file; |
|
482 |
|
483 } else { |
|
484 string *a; |
|
485 |
|
486 free (db_file); /* `db_file' wasn't on disk. */ |
|
487 |
|
488 /* The hit in the DB doesn't exist in disk. Now try all its |
|
489 aliases. For example, suppose we have a hierarchy on CD, |
|
490 thus `mf.bas', but ls-R contains `mf.base'. Find it anyway. |
|
491 Could probably work around this with aliases, but |
|
492 this is pretty easy and shouldn't hurt. The upshot is that |
|
493 if one of the aliases actually exists, we use that. */ |
|
494 for (a = aliases + 1; *a && !found; a++) { |
|
495 string atry = concat (*db_dirs, *a); |
|
496 if (kpse_readable_file (atry)) |
|
497 found = atry; |
|
498 else |
|
499 free (atry); |
|
500 } |
|
501 } |
|
502 |
|
503 /* If we have a real file, add it to the list, maybe done. */ |
|
504 if (found) { |
|
505 str_list_add (ret, found); |
|
506 if (!all && found) |
|
507 done = true; |
|
508 } |
|
509 } else { /* no match in the db */ |
|
510 free (db_file); |
|
511 } |
|
512 |
|
513 |
|
514 /* On to the next directory, if any. */ |
|
515 db_dirs++; |
|
516 } |
|
517 |
|
518 /* This is just the space for the pointers, not the strings. */ |
|
519 if (orig_dirs && *orig_dirs) |
|
520 free (orig_dirs); |
|
521 } |
|
522 |
|
523 free (aliases); |
|
524 |
|
525 /* If we had to break up NAME, free the temporary PATH_ELT. */ |
|
526 if (path_elt != orig_path_elt) |
|
527 free (path_elt); |
|
528 |
|
529 return ret; |
|
530 } |