gawk: Internal File Ops

1 
1 16.6.2 C Code for 'chdir()' and 'stat()'
1 ----------------------------------------
1 
1 Here is the C code for these extensions.(1)
1 
1    The file includes a number of standard header files, and then
1 includes the 'gawkapi.h' header file, which provides the API
1 definitions.  Those are followed by the necessary variable declarations
1 Boilerplate::):
1 
1      #ifdef HAVE_CONFIG_H
1      #include <config.h>
1      #endif
1 
1      #include <stdio.h>
1      #include <assert.h>
1      #include <errno.h>
1      #include <stdlib.h>
1      #include <string.h>
1      #include <unistd.h>
1 
1      #include <sys/types.h>
1      #include <sys/stat.h>
1 
1      #include "gawkapi.h"
1 
1      #include "gettext.h"
1      #define _(msgid)  gettext(msgid)
1      #define N_(msgid) msgid
1 
1      #include "gawkfts.h"
1      #include "stack.h"
1 
1      static const gawk_api_t *api;    /* for convenience macros to work */
1      static awk_ext_id_t ext_id;
1      static awk_bool_t init_filefuncs(void);
1      static awk_bool_t (*init_func)(void) = init_filefuncs;
1      static const char *ext_version = "filefuncs extension: version 1.0";
1 
1      int plugin_is_GPL_compatible;
1 
1    By convention, for an 'awk' function 'foo()', the C function that
1 implements it is called 'do_foo()'.  The function should have two
1 arguments.  The first is an 'int', usually called 'nargs', that
1 represents the number of actual arguments for the function.  The second
1 is a pointer to an 'awk_value_t' structure, usually named 'result':
1 
1      /*  do_chdir --- provide dynamically loaded chdir() function for gawk */
1 
1      static awk_value_t *
1      do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
1      {
1          awk_value_t newdir;
1          int ret = -1;
1 
1          assert(result != NULL);
1 
1    The 'newdir' variable represents the new directory to change to,
1 which is retrieved with 'get_argument()'.  Note that the first argument
1 is numbered zero.
1 
1    If the argument is retrieved successfully, the function calls the
1 'chdir()' system call.  Otherwise, if the 'chdir()' fails, it updates
1 'ERRNO':
1 
1          if (get_argument(0, AWK_STRING, & newdir)) {
1              ret = chdir(newdir.str_value.str);
1              if (ret < 0)
1                  update_ERRNO_int(errno);
1          }
1 
1    Finally, the function returns the return value to the 'awk' level:
1 
1          return make_number(ret, result);
1      }
1 
1    The 'stat()' extension is more involved.  First comes a function that
1 turns a numeric mode into a printable representation (e.g., octal '0644'
1 becomes '-rw-r--r--').  This is omitted here for brevity:
1 
1      /* format_mode --- turn a stat mode field into something readable */
1 
1      static char *
1      format_mode(unsigned long fmode)
1      {
1          ...
1      }
1 
1    Next comes a function for reading symbolic links, which is also
1 omitted here for brevity:
1 
1      /* read_symlink --- read a symbolic link into an allocated buffer.
1         ... */
1 
1      static char *
1      read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
1      {
1          ...
1      }
1 
1    Two helper functions simplify entering values in the array that will
1 contain the result of the 'stat()':
1 
1      /* array_set --- set an array element */
1 
1      static void
1      array_set(awk_array_t array, const char *sub, awk_value_t *value)
1      {
1          awk_value_t index;
1 
1          set_array_element(array,
1                            make_const_string(sub, strlen(sub), & index),
1                            value);
1 
1      }
1 
1      /* array_set_numeric --- set an array element with a number */
1 
1      static void
1      array_set_numeric(awk_array_t array, const char *sub, double num)
1      {
1          awk_value_t tmp;
1 
1          array_set(array, sub, make_number(num, & tmp));
1      }
1 
1    The following function does most of the work to fill in the
1 'awk_array_t' result array with values obtained from a valid 'struct
1 stat'.  This work is done in a separate function to support the 'stat()'
1 function for 'gawk' and also to support the 'fts()' extension, which is
11 included in the same file but whose code is not shown here (⇒
 Extension Sample File Functions).
1 
1    The first part of the function is variable declarations, including a
1 table to map file types to strings:
1 
1      /* fill_stat_array --- do the work to fill an array with stat info */
1 
1      static int
1      fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
1      {
1          char *pmode;    /* printable mode */
1          const char *type = "unknown";
1          awk_value_t tmp;
1          static struct ftype_map {
1              unsigned int mask;
1              const char *type;
1          } ftype_map[] = {
1              { S_IFREG, "file" },
1              { S_IFBLK, "blockdev" },
1              { S_IFCHR, "chardev" },
1              { S_IFDIR, "directory" },
1      #ifdef S_IFSOCK
1              { S_IFSOCK, "socket" },
1      #endif
1      #ifdef S_IFIFO
1              { S_IFIFO, "fifo" },
1      #endif
1      #ifdef S_IFLNK
1              { S_IFLNK, "symlink" },
1      #endif
1      #ifdef S_IFDOOR /* Solaris weirdness */
1              { S_IFDOOR, "door" },
1      #endif
1          };
1          int j, k;
1 
1    The destination array is cleared, and then code fills in various
1 elements based on values in the 'struct stat':
1 
1          /* empty out the array */
1          clear_array(array);
1 
1          /* fill in the array */
1          array_set(array, "name", make_const_string(name, strlen(name),
1                                                     & tmp));
1          array_set_numeric(array, "dev", sbuf->st_dev);
1          array_set_numeric(array, "ino", sbuf->st_ino);
1          array_set_numeric(array, "mode", sbuf->st_mode);
1          array_set_numeric(array, "nlink", sbuf->st_nlink);
1          array_set_numeric(array, "uid", sbuf->st_uid);
1          array_set_numeric(array, "gid", sbuf->st_gid);
1          array_set_numeric(array, "size", sbuf->st_size);
1          array_set_numeric(array, "blocks", sbuf->st_blocks);
1          array_set_numeric(array, "atime", sbuf->st_atime);
1          array_set_numeric(array, "mtime", sbuf->st_mtime);
1          array_set_numeric(array, "ctime", sbuf->st_ctime);
1 
1          /* for block and character devices, add rdev,
1             major and minor numbers */
1          if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) {
1              array_set_numeric(array, "rdev", sbuf->st_rdev);
1              array_set_numeric(array, "major", major(sbuf->st_rdev));
1              array_set_numeric(array, "minor", minor(sbuf->st_rdev));
1          }
1 
1 The latter part of the function makes selective additions to the
1 destination array, depending upon the availability of certain members
1 and/or the type of the file.  It then returns zero, for success:
1 
1      #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1          array_set_numeric(array, "blksize", sbuf->st_blksize);
1      #endif
1 
1          pmode = format_mode(sbuf->st_mode);
1          array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
1                                                      & tmp));
1 
1          /* for symbolic links, add a linkval field */
1          if (S_ISLNK(sbuf->st_mode)) {
1              char *buf;
1              ssize_t linksize;
1 
1              if ((buf = read_symlink(name, sbuf->st_size,
1                          & linksize)) != NULL)
1                  array_set(array, "linkval",
1                            make_malloced_string(buf, linksize, & tmp));
1              else
1                  warning(ext_id, _("stat: unable to read symbolic link `%s'"),
1                          name);
1          }
1 
1          /* add a type field */
1          type = "unknown";   /* shouldn't happen */
1          for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) {
1              if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) {
1                  type = ftype_map[j].type;
1                  break;
1              }
1          }
1 
1          array_set(array, "type", make_const_string(type, strlen(type), & tmp));
1 
1          return 0;
1      }
1 
1    The third argument to 'stat()' was not discussed previously.  This
1 argument is optional.  If present, it causes 'do_stat()' to use the
1 'stat()' system call instead of the 'lstat()' system call.  This is done
1 by using a function pointer: 'statfunc'.  'statfunc' is initialized to
1 point to 'lstat()' (instead of 'stat()') to get the file information, in
1 case the file is a symbolic link.  However, if the third argument is
1 included, 'statfunc' is set to point to 'stat()', instead.
1 
1    Here is the 'do_stat()' function, which starts with variable
1 declarations and argument checking:
1 
1      /* do_stat --- provide a stat() function for gawk */
1 
1      static awk_value_t *
1      do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
1      {
1          awk_value_t file_param, array_param;
1          char *name;
1          awk_array_t array;
1          int ret;
1          struct stat sbuf;
1          /* default is lstat() */
1          int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
1 
1          assert(result != NULL);
1 
1    Then comes the actual work.  First, the function gets the arguments.
1 Next, it gets the information for the file.  If the called function
1 ('lstat()' or 'stat()') returns an error, the code sets 'ERRNO' and
1 returns:
1 
1          /* file is first arg, array to hold results is second */
1          if (   ! get_argument(0, AWK_STRING, & file_param)
1              || ! get_argument(1, AWK_ARRAY, & array_param)) {
1              warning(ext_id, _("stat: bad parameters"));
1              return make_number(-1, result);
1          }
1 
1          if (nargs == 3) {
1              statfunc = stat;
1          }
1 
1          name = file_param.str_value.str;
1          array = array_param.array_cookie;
1 
1          /* always empty out the array */
1          clear_array(array);
1 
1          /* stat the file; if error, set ERRNO and return */
1          ret = statfunc(name, & sbuf);
1          if (ret < 0) {
1              update_ERRNO_int(errno);
1              return make_number(ret, result);
1          }
1 
1    The tedious work is done by 'fill_stat_array()', shown earlier.  When
1 done, the function returns the result from 'fill_stat_array()':
1 
1          ret = fill_stat_array(name, array, & sbuf);
1 
1          return make_number(ret, result);
1      }
1 
1    Finally, it's necessary to provide the "glue" that loads the new
1 function(s) into 'gawk'.
1 
1    The 'filefuncs' extension also provides an 'fts()' function, which we
1 omit here (⇒Extension Sample File Functions).  For its sake,
1 there is an initialization function:
1 
1      /* init_filefuncs --- initialization routine */
1 
1      static awk_bool_t
1      init_filefuncs(void)
1      {
1          ...
1      }
1 
1    We are almost done.  We need an array of 'awk_ext_func_t' structures
1 for loading each function into 'gawk':
1 
1      static awk_ext_func_t func_table[] = {
1          { "chdir", do_chdir, 1, 1, awk_false, NULL },
1          { "stat",  do_stat, 3, 2, awk_false, NULL },
1          ...
1      };
1 
1    Each extension must have a routine named 'dl_load()' to load
1 everything that needs to be loaded.  It is simplest to use the
1 'dl_load_func()' macro in 'gawkapi.h':
1 
1      /* define the dl_load() function using the boilerplate macro */
1 
1      dl_load_func(func_table, filefuncs, "")
1 
1    And that's it!
1 
1    ---------- Footnotes ----------
1 
1    (1) This version is edited slightly for presentation.  See
1 'extension/filefuncs.c' in the 'gawk' distribution for the complete
1 version.
1