changeset 27330:9e9a574df1f2

Make fts (in FTS_CWDFD mode) more efficient by caching a few open file descriptors. This also averts a failure on systems with native openat support when a traversed directory lacks "x" access. * lib/fts_.h: Include "i-ring.h" (struct FTS) [fts_fd_ring]: New member. * lib/fts.c (RESTORE_INITIAL_CWD): Also call fd_ring_clear. (FCHDIR): Add parentheses. (fd_ring_check, fd_ring_print) [!FTS_DEBUG]: Define away. (cwd_advance_fd): Add a 3rd parameter. Adjust all callers. When descending, rather than simply closing the previous fts_cwd_fd value, push that file descriptor onto the ring. (same_fd, fd_ring_print, fd_ring_check) [FTS_DEBUG]: New functions. (fts_open): Initialize the new fd_ring member. (fts_close): Clear the ring. (fts_safe_changedir): When possible, use our new fd_ring to skip the diropen and fstat and dev/ino comparison that would normally accompany a virtual `chdir ("..")'. * modules/fts (Depends-on): Add i-ring. * modules/i-ring: New module. * lib/i-ring.c, lib/i-ring.h, lib/i-ring-test.c: New files. * m4/i-ring.m4: New file.
author Jim Meyering <jim@meyering.net>
date Sun, 12 Nov 2006 17:35:38 +0000
parents 30e7bde1d386
children 5220d90b36c9
files ChangeLog lib/fts.c lib/fts_.h lib/i-ring-test.c lib/i-ring.c lib/i-ring.h m4/i-ring.m4 modules/fts modules/i-ring
diffstat 9 files changed, 398 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Nov 12 09:53:16 2006 +0000
+++ b/ChangeLog	Sun Nov 12 17:35:38 2006 +0000
@@ -1,3 +1,28 @@
+2006-11-12  Jim Meyering  <jim@meyering.net>
+
+	Make fts (in FTS_CWDFD mode) more efficient by caching a few open
+	file descriptors.  This also averts a failure on systems with
+	native openat support when a traversed directory lacks "x" access.
+	* lib/fts_.h: Include "i-ring.h"
+	(struct FTS) [fts_fd_ring]: New member.
+	* lib/fts.c (RESTORE_INITIAL_CWD): Also call fd_ring_clear.
+	(FCHDIR): Add parentheses.
+	(fd_ring_check, fd_ring_print) [!FTS_DEBUG]: Define away.
+	(cwd_advance_fd): Add a 3rd parameter.  Adjust all callers.
+	When descending, rather than simply closing the previous
+	fts_cwd_fd value, push that file descriptor onto the ring.
+	(same_fd, fd_ring_print, fd_ring_check) [FTS_DEBUG]: New functions.
+	(fts_open): Initialize the new fd_ring member.
+	(fts_close): Clear the ring.
+	(fts_safe_changedir): When possible, use our new fd_ring to skip
+	the diropen and fstat and dev/ino comparison that would normally
+	accompany a virtual `chdir ("..")'.
+
+	* modules/fts (Depends-on): Add i-ring.
+	* modules/i-ring: New module.
+	* lib/i-ring.c, lib/i-ring.h, lib/i-ring-test.c: New files.
+	* m4/i-ring.m4: New file.
+
 2006-11-12  Ralf Wildenhues  <Ralf.Wildenhues@gmx.de>
 
 	* gnulib-tool (func_create_testdir): Fix replacement of
--- a/lib/fts.c	Sun Nov 12 09:53:16 2006 +0000
+++ b/lib/fts.c	Sun Nov 12 17:35:38 2006 +0000
@@ -74,6 +74,7 @@
 # include "lstat.h"
 # include "openat.h"
 # include "unistd--.h"
+# include "same-inode.h"
 #endif
 
 #include <dirent.h>
@@ -177,17 +178,21 @@
 #define ISSET(opt)	(sp->fts_options & (opt))
 #define SET(opt)	(sp->fts_options |= (opt))
 
-#define RESTORE_INITIAL_CWD(sp)	FCHDIR (sp, (ISSET (FTS_CWDFD) \
-					     ? AT_FDCWD	       \
-					     : sp->fts_rfd))
+/* FIXME: make this a function */
+#define RESTORE_INITIAL_CWD(sp)			\
+  (fd_ring_clear (&((sp)->fts_fd_ring)),	\
+   FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd)))
 
-#define FCHDIR(sp, fd)	(!ISSET(FTS_NOCHDIR)		    \
-			 && (ISSET(FTS_CWDFD)		    \
-                             ? (cwd_advance_fd (sp, fd), 0) \
-			     : fchdir(fd)))
+/* FIXME: FTS_NOCHDIR is now misnamed.
+   Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */
+#define FCHDIR(sp, fd)					\
+  (!ISSET(FTS_NOCHDIR) && (ISSET(FTS_CWDFD)		\
+			   ? (cwd_advance_fd ((sp), (fd), true), 0) \
+			   : fchdir (fd)))
 
 
 /* fts_build flags */
+/* FIXME: make this an enum */
 #define BCHILD		1		/* fts_children */
 #define BNAMES		2		/* fts_children, names only */
 #define BREAD		3		/* fts_read */
@@ -196,10 +201,13 @@
 # include <inttypes.h>
 # include <stdint.h>
 # include <stdio.h>
+# include "getcwdat.h"
 bool fts_debug = false;
-# define Dprintf(x) do { if (fts_debug) printf x; } while (0)
+# define Dprintf(x) do { if (fts_debug) printf x; } while (false)
 #else
 # define Dprintf(x)
+# define fd_ring_check(x)
+# define fd_ring_print(a, b, c)
 #endif
 
 #define LEAVE_DIR(Fts, Ent, Tag)				\
@@ -207,9 +215,21 @@
     {								\
       Dprintf (("  %s-leaving: %s\n", Tag, (Ent)->fts_path));	\
       leave_dir (Fts, Ent);					\
+      fd_ring_check (Fts);					\
     }								\
   while (false)
 
+static void
+fd_ring_clear (I_ring *fd_ring)
+{
+  while ( ! i_ring_empty (fd_ring))
+    {
+      int fd = i_ring_pop (fd_ring);
+      if (0 <= fd)
+	close (fd);
+    }
+}
+
 /* Overload the fts_statp->st_size member (otherwise unused, when
    fts_info is FTS_NSOK) to indicate whether fts_read should stat
    this entry or not.  */
@@ -244,19 +264,35 @@
   return dirp;
 }
 
-/* Virtual fchdir.  Advance SP's working directory
-   file descriptor, SP->fts_cwd_fd, to FD, and close
-   the previous one, ignoring any error.  */
+/* Virtual fchdir.  Advance SP's working directory file descriptor,
+   SP->fts_cwd_fd, to FD, and push the previous value onto the fd_ring.
+   CHDIR_DOWN_ONE is true if FD corresponds to an entry in the directory
+   open on sp->fts_cwd_fd; i.e., to move the working directory one level
+   down.  */
 static void
 internal_function
-cwd_advance_fd (FTS *sp, int fd)
+cwd_advance_fd (FTS *sp, int fd, bool chdir_down_one)
 {
   int old = sp->fts_cwd_fd;
   if (old == fd && old != AT_FDCWD)
     abort ();
+
+  if (chdir_down_one)
+    {
+      /* Push "old" onto the ring.
+	 If the displaced file descriptor is non-negative, close it.  */
+      int prev_fd_in_slot = i_ring_push (&sp->fts_fd_ring, old);
+      fd_ring_print (sp, stderr, "post-push");
+      if (0 <= prev_fd_in_slot)
+	close (prev_fd_in_slot); /* ignore any close failure */
+    }
+  else if ( ! ISSET (FTS_NOCHDIR))
+    {
+      if (0 <= old)
+	close (old); /* ignore any close failure */
+    }
+
   sp->fts_cwd_fd = fd;
-  if (0 <= old)
-    close (old); /* ignore any close failure */
 }
 
 /* Open the directory DIR if possible, and return a file
@@ -448,6 +484,7 @@
 	    && (sp->fts_rfd = diropen (sp, ".")) < 0)
 		SET(FTS_NOCHDIR);
 
+	i_ring_init (&sp->fts_fd_ring, -1);
 	return (sp);
 
 mem3:	fts_lfree(root);
@@ -521,6 +558,7 @@
 	    close(sp->fts_rfd);
 	  }
 
+	fd_ring_clear (&sp->fts_fd_ring);
 	free_dir (sp);
 
 	/* Free up the stream pointer. */
@@ -850,7 +888,7 @@
 	sp->fts_child = fts_build(sp, instr);
 	if (ISSET(FTS_CWDFD))
 	  {
-	    cwd_advance_fd (sp, fd);
+	    cwd_advance_fd (sp, fd, true);
 	  }
 	else
 	  {
@@ -1228,6 +1266,87 @@
 	}
     }
 }
+
+static bool
+same_fd (int fd1, int fd2)
+{
+  struct stat sb1, sb2;
+  return (fstat (fd1, &sb1) == 0
+	  && fstat (fd2, &sb2) == 0
+	  && SAME_INODE (sb1, sb2));
+}
+
+static void
+fd_ring_print (FTS const *sp, FILE *stream, char const *msg)
+{
+  I_ring const *fd_ring = &sp->fts_fd_ring;
+  unsigned int i = fd_ring->fts_front;
+  char *cwd = getcwdat (sp->fts_cwd_fd, NULL, 0);
+  fprintf (stream, "=== %s ========== %s\n", msg, cwd);
+  free (cwd);
+  if (i_ring_empty (fd_ring))
+    return;
+
+  while (true)
+    {
+      int fd = fd_ring->fts_fd_ring[i];
+      if (fd < 0)
+	fprintf (stream, "%d: %d:\n", i, fd);
+      else
+	{
+	  char *wd = getcwdat (fd, NULL, 0);
+	  fprintf (stream, "%d: %d: %s\n", i, fd, wd);
+	  free (wd);
+	}
+      if (i == fd_ring->fts_back)
+	break;
+      i = (i + I_RING_SIZE - 1) % I_RING_SIZE;
+    }
+}
+
+/* Ensure that each file descriptor on the fd_ring matches a
+   parent, grandparent, etc. of the current working directory.  */
+static void
+fd_ring_check (FTS const *sp)
+{
+  if (!fts_debug)
+    return;
+
+  /* Make a writable copy.  */
+  I_ring fd_w = sp->fts_fd_ring;
+
+  int cwd_fd = sp->fts_cwd_fd;
+  cwd_fd = dup (cwd_fd);
+  char *dot = getcwdat (cwd_fd, NULL, 0);
+  error (0, 0, "===== check ===== cwd: %s", dot);
+  free (dot);
+  while ( ! i_ring_empty (&fd_w))
+    {
+      int fd = i_ring_pop (&fd_w);
+      if (0 <= fd)
+	{
+	  int parent_fd = openat (cwd_fd, "..", O_RDONLY);
+	  if (parent_fd < 0)
+	    {
+	      // Warn?
+	      break;
+	    }
+	  if (!same_fd (fd, parent_fd))
+	    {
+	      char *cwd = getcwdat (fd, NULL, 0);
+	      error (0, errno, "ring  : %s", cwd);
+	      char *c2 = getcwdat (parent_fd, NULL, 0);
+	      error (0, errno, "parent: %s", c2);
+	      free (cwd);
+	      free (c2);
+	      abort ();
+	    }
+	  close (cwd_fd);
+	  cwd_fd = parent_fd;
+	}
+    }
+  close (cwd_fd);
+}
 #endif
 
 static unsigned short int
@@ -1503,28 +1622,51 @@
 fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir)
 {
 	int ret;
-
-	int newfd = fd;
+	bool is_dotdot = dir && STREQ (dir, "..");
+	int newfd;
 
 	/* This clause handles the unusual case in which FTS_NOCHDIR
 	   is specified, along with FTS_CWDFD.  In that case, there is
 	   no need to change even the virtual cwd file descriptor.
 	   However, if FD is non-negative, we do close it here.  */
-	if (ISSET(FTS_NOCHDIR)) {
-		if (ISSET(FTS_CWDFD) && 0 <= fd)
-			close (fd);
-		return (0);
-	}
-	if (fd < 0 && (newfd = diropen (sp, dir)) < 0)
-		return (-1);
+	if (ISSET (FTS_NOCHDIR))
+	  {
+	    if (ISSET (FTS_CWDFD) && 0 <= fd)
+	      close (fd);
+	    return 0;
+	  }
 
-	/* The following dev/inode check is necessary if we're doing
-	   a `logical' traversal (through symlinks, a la chown -L),
-	   if the system lacks O_NOFOLLOW support, or if we're changing
-	   to "..".  In the latter case, O_NOFOLLOW can't help.  In
-	   general (when the target is not ".."), diropen's use of
-	   O_NOFOLLOW ensures we don't mistakenly follow a symlink,
-	   so we can avoid the expense of this fstat.  */
+	if (fd < 0 && is_dotdot && ISSET (FTS_CWDFD))
+	  {
+	    /* When possible, skip the diropen and subsequent fstat+dev/ino
+	       comparison.  I.e., when changing to parent directory
+	       (chdir ("..")), use a file descriptor from the ring and
+	       save the overhead of diropen+fstat, as well as avoiding
+	       failure when we lack "x" access to the virtual cwd.  */
+	    if ( ! i_ring_empty (&sp->fts_fd_ring))
+	      {
+		fd_ring_print (sp, stderr, "pre-pop");
+		int parent_fd = i_ring_pop (&sp->fts_fd_ring);
+		is_dotdot = true;
+		if (0 <= parent_fd)
+		  {
+		    fd = parent_fd;
+		    dir = NULL;
+		  }
+	      }
+	  }
+
+	newfd = fd;
+	if (fd < 0 && (newfd = diropen (sp, dir)) < 0)
+	  return -1;
+
+	/* The following dev/inode check is necessary if we're doing a
+	   `logical' traversal (through symlinks, a la chown -L), if the
+	   system lacks O_NOFOLLOW support, or if we're changing to ".."
+	   (but not via a popped file descriptor).  When changing to the
+	   name "..", O_NOFOLLOW can't help.  In general, when the target is
+	   not "..", diropen's use of O_NOFOLLOW ensures we don't mistakenly
+	   follow a symlink, so we can avoid the expense of this fstat.  */
 	if (ISSET(FTS_LOGICAL) || ! HAVE_WORKING_O_NOFOLLOW
 	    || (dir && STREQ (dir, "..")))
 	  {
@@ -1545,7 +1687,7 @@
 
 	if (ISSET(FTS_CWDFD))
 	  {
-	    cwd_advance_fd (sp, newfd);
+	    cwd_advance_fd (sp, newfd, ! is_dotdot);
 	    return 0;
 	  }
 
@@ -1557,5 +1699,5 @@
 	    (void)close(newfd);
 	    __set_errno (oerrno);
 	  }
-	return (ret);
+	return ret;
 }
--- a/lib/fts_.h	Sun Nov 12 09:53:16 2006 +0000
+++ b/lib/fts_.h	Sun Nov 12 17:35:38 2006 +0000
@@ -65,6 +65,7 @@
 # include <stddef.h>
 # include <sys/types.h>
 # include <sys/stat.h>
+# include "i-ring.h"
 
 typedef struct {
 	struct _ftsent *fts_cur;	/* current node */
@@ -163,7 +164,12 @@
 		   but it's not appropriate for programs like du.  */
 		struct cycle_check_state *state;
 	} fts_cycle;
+
 # endif
+	/* A stack of the file descriptors corresponding to the
+	   most-recently traversed parent directories.
+	   Currently used only in FTS_CWDFD mode.  */
+	I_ring fts_fd_ring;
 } FTS;
 
 typedef struct _ftsent {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/i-ring-test.c	Sun Nov 12 17:35:38 2006 +0000
@@ -0,0 +1,44 @@
+#include <assert.h>
+#include "i-ring.h"
+/* Test with this:
+   gcc -I. -Wall -W -O i-ring-test.c i-ring.c -L. -lcoreutils && ./a.out
+*/
+
+int
+main ()
+{
+  I_ring ir;
+  i_ring_init (&ir, -1);
+  int o = i_ring_push (&ir, 1);
+  assert (o == -1);
+  o = i_ring_push (&ir, 2);
+  assert (o == -1);
+  o = i_ring_push (&ir, 3);
+  assert (o == -1);
+  o = i_ring_push (&ir, 4);
+  assert (o == -1);
+  o = i_ring_push (&ir, 5);
+  assert (o == 1);
+  o = i_ring_push (&ir, 6);
+  assert (o == 2);
+  o = i_ring_push (&ir, 7);
+  assert (o == 3);
+
+  o = i_ring_pop (&ir);
+  assert (o == 7);
+  o = i_ring_pop (&ir);
+  assert (o == 6);
+  o = i_ring_pop (&ir);
+  assert (o == 5);
+  o = i_ring_pop (&ir);
+  assert (o == 4);
+  assert (i_ring_empty (&ir));
+
+  o = i_ring_push (&ir, 8);
+  assert (o == -1);
+  o = i_ring_pop (&ir);
+  assert (o == 8);
+  assert (i_ring_empty (&ir));
+
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/i-ring.c	Sun Nov 12 17:35:38 2006 +0000
@@ -0,0 +1,69 @@
+/* a simple ring buffer
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* written by Jim Meyering */
+
+#include <config.h>
+#include "i-ring.h"
+
+#include <stdlib.h>
+
+void
+i_ring_init (I_ring *ir, int default_val)
+{
+  int i;
+  ir->ir_empty = true;
+  ir->ir_front = 0;
+  ir->ir_back = 0;
+  for (i = 0; i < I_RING_SIZE; i++)
+    ir->ir_data[i] = default_val;
+  ir->ir_default_val = default_val;
+}
+
+bool
+i_ring_empty (I_ring const *ir)
+{
+  return ir->ir_empty;
+}
+
+int
+i_ring_push (I_ring *ir, int val)
+{
+  unsigned int dest_idx = (ir->ir_front + !ir->ir_empty) % I_RING_SIZE;
+  int old_val = ir->ir_data[dest_idx];
+  ir->ir_data[dest_idx] = val;
+  ir->ir_front = dest_idx;
+  if (dest_idx == ir->ir_back)
+    ir->ir_back = (ir->ir_back + !ir->ir_empty) % I_RING_SIZE;
+  ir->ir_empty = false;
+  return old_val;
+}
+
+int
+i_ring_pop (I_ring *ir)
+{
+  int top_val;
+  if (i_ring_empty (ir))
+    abort ();
+  top_val = ir->ir_data[ir->ir_front];
+  ir->ir_data[ir->ir_front] = ir->ir_default_val;
+  if (ir->ir_front == ir->ir_back)
+    ir->ir_empty = true;
+  else
+    ir->ir_front = ((ir->ir_front + I_RING_SIZE - 1) % I_RING_SIZE);
+  return top_val;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/i-ring.h	Sun Nov 12 17:35:38 2006 +0000
@@ -0,0 +1,45 @@
+/* definitions for a simple ring buffer
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include <stdbool.h>
+#include "verify.h"
+
+enum { I_RING_SIZE = 4 };
+verify (1 <= I_RING_SIZE);
+
+/* When ir_empty is true, the ring is empty.
+   Otherwise, ir_data[B..F] are defined, where B..F is the contiguous
+   range of indices, modulo I_RING_SIZE, from back to front, inclusive.
+   Undefined elements of ir_data are always set to ir_default_val.
+   Popping from an empty ring aborts.
+   Pushing onto a full ring returns the displaced value.
+   An empty ring has F==B and ir_empty == true.
+   A ring with one entry still has F==B, but now ir_empty == false.  */
+struct I_ring
+{
+  int ir_data[I_RING_SIZE];
+  int ir_default_val;
+  unsigned int ir_front;
+  unsigned int ir_back;
+  bool ir_empty;
+};
+typedef struct I_ring I_ring;
+
+void i_ring_init (I_ring *ir, int ir_default_val);
+int i_ring_push (I_ring *ir, int val);
+int i_ring_pop (I_ring *ir);
+bool i_ring_empty (I_ring const *ir);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/m4/i-ring.m4	Sun Nov 12 17:35:38 2006 +0000
@@ -0,0 +1,10 @@
+# serial 1
+dnl Copyright (C) 2006 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_I_RING],
+[
+  AC_LIBOBJ([i-ring])
+])
--- a/modules/fts	Sun Nov 12 09:53:16 2006 +0000
+++ b/modules/fts	Sun Nov 12 17:35:38 2006 +0000
@@ -14,6 +14,7 @@
 fcntl
 fcntl-safer
 hash
+i-ring
 lstat
 openat
 stdbool
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/i-ring	Sun Nov 12 17:35:38 2006 +0000
@@ -0,0 +1,23 @@
+Description:
+a simple ring buffer
+
+Files:
+lib/i-ring.h
+lib/i-ring.c
+m4/i-ring.m4
+
+Depends-on:
+
+configure.ac:
+gl_I_RING
+
+Makefile.am:
+
+Include:
+"i-ring.h"
+
+License:
+GPL
+
+Maintainer:
+Jim Meyering