gawk: Internal File Ops
1
1 16.6.2 C Code for 'chdir()' and 'stat()'
1 ----------------------------------------
1
1 Here is the C code for these extensions.(1)
1
1 The file includes a number of standard header files, and then
1 includes the 'gawkapi.h' header file, which provides the API
1 definitions. Those are followed by the necessary variable declarations
1 Boilerplate::):
1
1 #ifdef HAVE_CONFIG_H
1 #include <config.h>
1 #endif
1
1 #include <stdio.h>
1 #include <assert.h>
1 #include <errno.h>
1 #include <stdlib.h>
1 #include <string.h>
1 #include <unistd.h>
1
1 #include <sys/types.h>
1 #include <sys/stat.h>
1
1 #include "gawkapi.h"
1
1 #include "gettext.h"
1 #define _(msgid) gettext(msgid)
1 #define N_(msgid) msgid
1
1 #include "gawkfts.h"
1 #include "stack.h"
1
1 static const gawk_api_t *api; /* for convenience macros to work */
1 static awk_ext_id_t ext_id;
1 static awk_bool_t init_filefuncs(void);
1 static awk_bool_t (*init_func)(void) = init_filefuncs;
1 static const char *ext_version = "filefuncs extension: version 1.0";
1
1 int plugin_is_GPL_compatible;
1
1 By convention, for an 'awk' function 'foo()', the C function that
1 implements it is called 'do_foo()'. The function should have two
1 arguments. The first is an 'int', usually called 'nargs', that
1 represents the number of actual arguments for the function. The second
1 is a pointer to an 'awk_value_t' structure, usually named 'result':
1
1 /* do_chdir --- provide dynamically loaded chdir() function for gawk */
1
1 static awk_value_t *
1 do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
1 {
1 awk_value_t newdir;
1 int ret = -1;
1
1 assert(result != NULL);
1
1 The 'newdir' variable represents the new directory to change to,
1 which is retrieved with 'get_argument()'. Note that the first argument
1 is numbered zero.
1
1 If the argument is retrieved successfully, the function calls the
1 'chdir()' system call. Otherwise, if the 'chdir()' fails, it updates
1 'ERRNO':
1
1 if (get_argument(0, AWK_STRING, & newdir)) {
1 ret = chdir(newdir.str_value.str);
1 if (ret < 0)
1 update_ERRNO_int(errno);
1 }
1
1 Finally, the function returns the return value to the 'awk' level:
1
1 return make_number(ret, result);
1 }
1
1 The 'stat()' extension is more involved. First comes a function that
1 turns a numeric mode into a printable representation (e.g., octal '0644'
1 becomes '-rw-r--r--'). This is omitted here for brevity:
1
1 /* format_mode --- turn a stat mode field into something readable */
1
1 static char *
1 format_mode(unsigned long fmode)
1 {
1 ...
1 }
1
1 Next comes a function for reading symbolic links, which is also
1 omitted here for brevity:
1
1 /* read_symlink --- read a symbolic link into an allocated buffer.
1 ... */
1
1 static char *
1 read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
1 {
1 ...
1 }
1
1 Two helper functions simplify entering values in the array that will
1 contain the result of the 'stat()':
1
1 /* array_set --- set an array element */
1
1 static void
1 array_set(awk_array_t array, const char *sub, awk_value_t *value)
1 {
1 awk_value_t index;
1
1 set_array_element(array,
1 make_const_string(sub, strlen(sub), & index),
1 value);
1
1 }
1
1 /* array_set_numeric --- set an array element with a number */
1
1 static void
1 array_set_numeric(awk_array_t array, const char *sub, double num)
1 {
1 awk_value_t tmp;
1
1 array_set(array, sub, make_number(num, & tmp));
1 }
1
1 The following function does most of the work to fill in the
1 'awk_array_t' result array with values obtained from a valid 'struct
1 stat'. This work is done in a separate function to support the 'stat()'
1 function for 'gawk' and also to support the 'fts()' extension, which is
11 included in the same file but whose code is not shown here (⇒
Extension Sample File Functions).
1
1 The first part of the function is variable declarations, including a
1 table to map file types to strings:
1
1 /* fill_stat_array --- do the work to fill an array with stat info */
1
1 static int
1 fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
1 {
1 char *pmode; /* printable mode */
1 const char *type = "unknown";
1 awk_value_t tmp;
1 static struct ftype_map {
1 unsigned int mask;
1 const char *type;
1 } ftype_map[] = {
1 { S_IFREG, "file" },
1 { S_IFBLK, "blockdev" },
1 { S_IFCHR, "chardev" },
1 { S_IFDIR, "directory" },
1 #ifdef S_IFSOCK
1 { S_IFSOCK, "socket" },
1 #endif
1 #ifdef S_IFIFO
1 { S_IFIFO, "fifo" },
1 #endif
1 #ifdef S_IFLNK
1 { S_IFLNK, "symlink" },
1 #endif
1 #ifdef S_IFDOOR /* Solaris weirdness */
1 { S_IFDOOR, "door" },
1 #endif
1 };
1 int j, k;
1
1 The destination array is cleared, and then code fills in various
1 elements based on values in the 'struct stat':
1
1 /* empty out the array */
1 clear_array(array);
1
1 /* fill in the array */
1 array_set(array, "name", make_const_string(name, strlen(name),
1 & tmp));
1 array_set_numeric(array, "dev", sbuf->st_dev);
1 array_set_numeric(array, "ino", sbuf->st_ino);
1 array_set_numeric(array, "mode", sbuf->st_mode);
1 array_set_numeric(array, "nlink", sbuf->st_nlink);
1 array_set_numeric(array, "uid", sbuf->st_uid);
1 array_set_numeric(array, "gid", sbuf->st_gid);
1 array_set_numeric(array, "size", sbuf->st_size);
1 array_set_numeric(array, "blocks", sbuf->st_blocks);
1 array_set_numeric(array, "atime", sbuf->st_atime);
1 array_set_numeric(array, "mtime", sbuf->st_mtime);
1 array_set_numeric(array, "ctime", sbuf->st_ctime);
1
1 /* for block and character devices, add rdev,
1 major and minor numbers */
1 if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) {
1 array_set_numeric(array, "rdev", sbuf->st_rdev);
1 array_set_numeric(array, "major", major(sbuf->st_rdev));
1 array_set_numeric(array, "minor", minor(sbuf->st_rdev));
1 }
1
1 The latter part of the function makes selective additions to the
1 destination array, depending upon the availability of certain members
1 and/or the type of the file. It then returns zero, for success:
1
1 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1 array_set_numeric(array, "blksize", sbuf->st_blksize);
1 #endif
1
1 pmode = format_mode(sbuf->st_mode);
1 array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
1 & tmp));
1
1 /* for symbolic links, add a linkval field */
1 if (S_ISLNK(sbuf->st_mode)) {
1 char *buf;
1 ssize_t linksize;
1
1 if ((buf = read_symlink(name, sbuf->st_size,
1 & linksize)) != NULL)
1 array_set(array, "linkval",
1 make_malloced_string(buf, linksize, & tmp));
1 else
1 warning(ext_id, _("stat: unable to read symbolic link `%s'"),
1 name);
1 }
1
1 /* add a type field */
1 type = "unknown"; /* shouldn't happen */
1 for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) {
1 if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) {
1 type = ftype_map[j].type;
1 break;
1 }
1 }
1
1 array_set(array, "type", make_const_string(type, strlen(type), & tmp));
1
1 return 0;
1 }
1
1 The third argument to 'stat()' was not discussed previously. This
1 argument is optional. If present, it causes 'do_stat()' to use the
1 'stat()' system call instead of the 'lstat()' system call. This is done
1 by using a function pointer: 'statfunc'. 'statfunc' is initialized to
1 point to 'lstat()' (instead of 'stat()') to get the file information, in
1 case the file is a symbolic link. However, if the third argument is
1 included, 'statfunc' is set to point to 'stat()', instead.
1
1 Here is the 'do_stat()' function, which starts with variable
1 declarations and argument checking:
1
1 /* do_stat --- provide a stat() function for gawk */
1
1 static awk_value_t *
1 do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
1 {
1 awk_value_t file_param, array_param;
1 char *name;
1 awk_array_t array;
1 int ret;
1 struct stat sbuf;
1 /* default is lstat() */
1 int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
1
1 assert(result != NULL);
1
1 Then comes the actual work. First, the function gets the arguments.
1 Next, it gets the information for the file. If the called function
1 ('lstat()' or 'stat()') returns an error, the code sets 'ERRNO' and
1 returns:
1
1 /* file is first arg, array to hold results is second */
1 if ( ! get_argument(0, AWK_STRING, & file_param)
1 || ! get_argument(1, AWK_ARRAY, & array_param)) {
1 warning(ext_id, _("stat: bad parameters"));
1 return make_number(-1, result);
1 }
1
1 if (nargs == 3) {
1 statfunc = stat;
1 }
1
1 name = file_param.str_value.str;
1 array = array_param.array_cookie;
1
1 /* always empty out the array */
1 clear_array(array);
1
1 /* stat the file; if error, set ERRNO and return */
1 ret = statfunc(name, & sbuf);
1 if (ret < 0) {
1 update_ERRNO_int(errno);
1 return make_number(ret, result);
1 }
1
1 The tedious work is done by 'fill_stat_array()', shown earlier. When
1 done, the function returns the result from 'fill_stat_array()':
1
1 ret = fill_stat_array(name, array, & sbuf);
1
1 return make_number(ret, result);
1 }
1
1 Finally, it's necessary to provide the "glue" that loads the new
1 function(s) into 'gawk'.
1
1 The 'filefuncs' extension also provides an 'fts()' function, which we
1 omit here (⇒Extension Sample File Functions). For its sake,
1 there is an initialization function:
1
1 /* init_filefuncs --- initialization routine */
1
1 static awk_bool_t
1 init_filefuncs(void)
1 {
1 ...
1 }
1
1 We are almost done. We need an array of 'awk_ext_func_t' structures
1 for loading each function into 'gawk':
1
1 static awk_ext_func_t func_table[] = {
1 { "chdir", do_chdir, 1, 1, awk_false, NULL },
1 { "stat", do_stat, 3, 2, awk_false, NULL },
1 ...
1 };
1
1 Each extension must have a routine named 'dl_load()' to load
1 everything that needs to be loaded. It is simplest to use the
1 'dl_load_func()' macro in 'gawkapi.h':
1
1 /* define the dl_load() function using the boilerplate macro */
1
1 dl_load_func(func_table, filefuncs, "")
1
1 And that's it!
1
1 ---------- Footnotes ----------
1
1 (1) This version is edited slightly for presentation. See
1 'extension/filefuncs.c' in the 'gawk' distribution for the complete
1 version.
1