From 8640b292231bd69b0298e0f7ddc8bffbb570a7c5 Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Wed, 18 Sep 2024 12:20:16 -0700 Subject: [PATCH 1/6] Implement -v and -F flags --- user/grep.c | 66 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/user/grep.c b/user/grep.c index 2315a0c..0be64b2 100644 --- a/user/grep.c +++ b/user/grep.c @@ -7,6 +7,12 @@ char buf[1024]; int match(char*, char*); +/* -F, -R, -v flags */ +/* No bool type, so use uint8 instead */ +uint8 Fflag = 0; +uint8 Rflag = 0; +uint8 vflag = 0; + void grep(char *pattern, int fd) { @@ -14,13 +20,17 @@ grep(char *pattern, int fd) char *p, *q; m = 0; + /* Write continuously to the buffer. If runs out of space, move current + * position to beginning of buffer and read again.*/ while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){ m += n; buf[m] = '\0'; p = buf; while((q = strchr(p, '\n')) != 0){ *q = 0; - if(match(pattern, p)){ + /* Print line if match and no -v flag or if no match and -v flag + * Simple xor operation */ + if (match(pattern, p) != vflag) { *q = '\n'; write(1, p, q+1 - p); } @@ -40,23 +50,53 @@ main(int argc, char *argv[]) char *pattern; if(argc <= 1){ - fprintf(2, "usage: grep pattern [file ...]\n"); + fprintf(2, "usage: grep [-F] [-R] [-v] pattern [file ...]\n"); exit(1); } - pattern = argv[1]; - if(argc <= 2){ + /* check for flags */ + i = 1; + while(1) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'F': + Fflag = 1; + break; + case 'R': + Rflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]); + exit(1); + } + i++; + } else { + break; + } + } + /* Mostly so I don't get unused variable errors*/ + if (Rflag){ + fprintf(1, "Flag found\n"); + } + + pattern = argv[i++]; + + if(argc <= i){ grep(pattern, 0); exit(0); } - for(i = 2; i < argc; i++){ + while(i < argc){ if((fd = open(argv[i], 0)) < 0){ printf("grep: cannot open %s\n", argv[i]); exit(1); } grep(pattern, fd); close(fd); + i++; } exit(0); } @@ -71,7 +111,8 @@ int matchstar(int, char*, char*); int match(char *re, char *text) { - if(re[0] == '^') + // Ignore regex stuff when -F is passed + if(re[0] == '^' && !Fflag) return matchhere(re+1, text); do{ // must look at empty string if(matchhere(re, text)) @@ -85,11 +126,14 @@ int matchhere(char *re, char *text) { if(re[0] == '\0') return 1; - if(re[1] == '*') - return matchstar(re[0], re+2, text); - if(re[0] == '$' && re[1] == '\0') - return *text == '\0'; - if(*text!='\0' && (re[0]=='.' || re[0]==*text)) + // Ignore regex stuff when -F is passed + if (!Fflag){ + if(re[1] == '*') + return matchstar(re[0], re+2, text); + if(re[0] == '$' && re[1] == '\0') + return *text == '\0'; + } + if(*text!='\0' && ((!Fflag && re[0]=='.') || re[0]==*text)) return matchhere(re+1, text+1); return 0; } From 455614860d43135ea9fca94c8293d138c7936d05 Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Wed, 18 Sep 2024 13:21:09 -0700 Subject: [PATCH 2/6] Print file name in color --- user/grep.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/user/grep.c b/user/grep.c index 0be64b2..e8d43c7 100644 --- a/user/grep.c +++ b/user/grep.c @@ -13,8 +13,10 @@ uint8 Fflag = 0; uint8 Rflag = 0; uint8 vflag = 0; +uint8 printPath = 0; + void -grep(char *pattern, int fd) +grep(char *pattern, int fd, char *path) { int n, m; char *p, *q; @@ -32,6 +34,8 @@ grep(char *pattern, int fd) * Simple xor operation */ if (match(pattern, p) != vflag) { *q = '\n'; + if (printPath) + fprintf(1, "\x1b[31m%s:\x1b[0m", path); write(1, p, q+1 - p); } p = q+1; @@ -77,24 +81,26 @@ main(int argc, char *argv[]) break; } } - /* Mostly so I don't get unused variable errors*/ - if (Rflag){ - fprintf(1, "Flag found\n"); - } pattern = argv[i++]; if(argc <= i){ - grep(pattern, 0); + printPath = 0; + grep(pattern, 0, "\0"); exit(0); } + // Print path if recursion is used or if multiple files are passed + if(Rflag || i + 1 < argc) { + printPath = 1; + } + while(i < argc){ if((fd = open(argv[i], 0)) < 0){ printf("grep: cannot open %s\n", argv[i]); exit(1); } - grep(pattern, fd); + grep(pattern, fd, argv[i]); close(fd); i++; } From 0dc043103e83ad57b3d8f3f54e01e5a44e122d6e Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Thu, 19 Sep 2024 11:24:53 -0700 Subject: [PATCH 3/6] Add -R flag and resursion functionality --- user/grep.c | 98 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/user/grep.c b/user/grep.c index e8d43c7..5f804ee 100644 --- a/user/grep.c +++ b/user/grep.c @@ -2,13 +2,15 @@ #include "kernel/types.h" #include "kernel/stat.h" +#include "kernel/fs.h" #include "user/user.h" char buf[1024]; int match(char*, char*); +int dirgrep(char*, int, char*); -/* -F, -R, -v flags */ -/* No bool type, so use uint8 instead */ +// -F, -R, -v flags +// No bool type, so use uint8 instead uint8 Fflag = 0; uint8 Rflag = 0; uint8 vflag = 0; @@ -22,20 +24,20 @@ grep(char *pattern, int fd, char *path) char *p, *q; m = 0; - /* Write continuously to the buffer. If runs out of space, move current - * position to beginning of buffer and read again.*/ + // Write continuously to the buffer. If runs out of space, move current + // position to beginning of buffer and read again. while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){ m += n; buf[m] = '\0'; p = buf; while((q = strchr(p, '\n')) != 0){ *q = 0; - /* Print line if match and no -v flag or if no match and -v flag - * Simple xor operation */ + // Print line if match and no -v flag or if no match and -v flag + // Simple xor operation if (match(pattern, p) != vflag) { *q = '\n'; - if (printPath) - fprintf(1, "\x1b[31m%s:\x1b[0m", path); + if (printPath) + fprintf(1, "\x1b[31m%s:\x1b[0m", path); write(1, p, q+1 - p); } p = q+1; @@ -58,23 +60,23 @@ main(int argc, char *argv[]) exit(1); } - /* check for flags */ + // check for flags i = 1; while(1) { if (argv[i][0] == '-') { switch (argv[i][1]) { case 'F': - Fflag = 1; - break; + Fflag = 1; + break; case 'R': - Rflag = 1; - break; + Rflag = 1; + break; case 'v': - vflag = 1; - break; - default: - fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]); - exit(1); + vflag = 1; + break; + default: + fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]); + exit(1); } i++; } else { @@ -97,16 +99,72 @@ main(int argc, char *argv[]) while(i < argc){ if((fd = open(argv[i], 0)) < 0){ - printf("grep: cannot open %s\n", argv[i]); + fprintf(2, "grep: cannot open %s\n", argv[i]); exit(1); } - grep(pattern, fd, argv[i]); + + // Recursion for -R + if (Rflag) { + dirgrep(pattern, fd, argv[i]); + } else { + grep(pattern, fd, argv[i]); + } + + // cleanup close(fd); i++; } exit(0); } +// Recursively go through directories +int +dirgrep(char *pattern, int fd, char *path) { + if (fstat(fd, &st) < 0) { + fprintf(2, "grep: cannot stat %s", path); + return 1; + } + + switch (st.type) { + case T_FILE: + grep(pattern, fd, path); + break; + case T_DIR: + char buf[1024], *p; + struct stat st; + struct dirent de; + int child_fd; + + strcpy(buf, path); + p = buf + strlen(buf); + *p++ = '/'; + + while (read(fd, &de, sizeof(de)) == sizeof(de)) { + if(de.inum == 0) + continue; + + // skip . and .. + if (strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) + continue; + + memmove(p, de.name, DIRSIZ); + + // run dirgrep on the new path + if ((child_fd = open(buf, 0)) < 0){ + fprintf(2, "grep: cannot open %s\n", buf); + continue; + } + + dirgrep(pattern, child_fd, buf); + close(child_fd); + } + break; + case default: + continue; + } + return 0; +} + // Regexp matcher from Kernighan & Pike, // The Practice of Programming, Chapter 9, or // https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html From 2bc149f998632c85a98d08e83ee5bc78236f990b Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Thu, 19 Sep 2024 11:34:38 -0700 Subject: [PATCH 4/6] Clean up code and fix issues --- user/grep.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/user/grep.c b/user/grep.c index 5f804ee..8d6618b 100644 --- a/user/grep.c +++ b/user/grep.c @@ -120,6 +120,8 @@ main(int argc, char *argv[]) // Recursively go through directories int dirgrep(char *pattern, int fd, char *path) { + struct stat st; + if (fstat(fd, &st) < 0) { fprintf(2, "grep: cannot stat %s", path); return 1; @@ -131,7 +133,6 @@ dirgrep(char *pattern, int fd, char *path) { break; case T_DIR: char buf[1024], *p; - struct stat st; struct dirent de; int child_fd; @@ -140,11 +141,8 @@ dirgrep(char *pattern, int fd, char *path) { *p++ = '/'; while (read(fd, &de, sizeof(de)) == sizeof(de)) { - if(de.inum == 0) - continue; - - // skip . and .. - if (strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) + // skip if not a proper file or directory + if (de.inum == 0 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) continue; memmove(p, de.name, DIRSIZ); @@ -158,9 +156,6 @@ dirgrep(char *pattern, int fd, char *path) { dirgrep(pattern, child_fd, buf); close(child_fd); } - break; - case default: - continue; } return 0; } From c44889519333b62d7214337b1d7da68f36023b2a Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Thu, 26 Sep 2024 14:25:16 -0700 Subject: [PATCH 5/6] Shrink buffer to prevent memory issues Fixes an issue that caused a trap when recursing down at least 3 levels. It was caused by allocating 1024 bytes for a path, despite the max path length being 128. --- user/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/user/grep.c b/user/grep.c index 8d6618b..18b50fa 100644 --- a/user/grep.c +++ b/user/grep.c @@ -132,7 +132,7 @@ dirgrep(char *pattern, int fd, char *path) { grep(pattern, fd, path); break; case T_DIR: - char buf[1024], *p; + char buf[128], *p; struct dirent de; int child_fd; From 6cc85808f5c0281d39023ec00c89d28f2c220f0d Mon Sep 17 00:00:00 2001 From: Ritvik Nayak Date: Thu, 26 Sep 2024 17:25:13 -0700 Subject: [PATCH 6/6] Add documentation and exit codes The exit codes aren't going to be too useful in current FogOS, but should prove useful once sh matures. The documentation is in markdown, though it's formatted to be viewed in the terminal. --- docs/grep.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ user/grep.c | 14 ++++++++--- 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 docs/grep.md diff --git a/docs/grep.md b/docs/grep.md new file mode 100644 index 0000000..bcb89ad --- /dev/null +++ b/docs/grep.md @@ -0,0 +1,70 @@ +# grep + +## NAME + +grep - print lines that match patterns + +## SYNOPSIS + +``` +grep [-R] [-F] [-v] _pattern_ [file...] +``` + +## DESCRIPTION + +`grep` searches for the specified pattern in each file. If no file is provided, +`grep` matches against standard input. Patterns are a limited regular +expression, using `^` for the beginning of a line, `$` for the end of a line, +`.` to denote any character, and `\*` for Kleene stars. + +If `-R` or multiple files are specified, then the path name is printed in red +before the matching line. + +## OPTIONS + +`-R` Read all files under each directory recursively. + +`-F` Use a fixed string as the pattern rather than a regular expression. Can + yield slight performance boost. + +`-v` Invert the matching, printing only the lines that _don't_ match + +Any other flag will fail. + +## EXIT STATUS + +`grep` exits with 0 if at least one line matched (or didn't with `-v`), 1 if no +lines match (or all with `-v`), and 2 if an error occured. + +## BUGS/CONSIDERATIONS + +* Flags _must_ be specified before the pattern, and must be specified + individually. +* 2 is returned if _any_ error occurs, even if there is a match. Generally a + problem when matching against multiple files, and one matches while another + errors. +* Running `grep -R _pattern_` is **not** equivalent to `grep -R _pattern_ .`, + instead being equivalent to `grep _pattern_`. This is a bug. +* Recursion is depth-first, and is ordered by inode creation time rather than + lexically. +* Breaks up lines longer than 1024 characters. Shouldn't affect most people. + +## EXAMPLES + +`$ grep -R pattern dir` + +matches `pattern` against all files in `dir` and its subdirectories (that can +be opened). + +`$ grep -F F.g README.md` + +matches README.md against the fixed string `F.g`. The pattern is not +interpreted as a regular expression, so `Fog` does not match. + +`$ grep -v Fog README.md` + +matches README.md and prints the lines that _don't_ match the pattern `Fog`. + +`$ ls | grep README` + +finds a file in the current directory whose name matches the pattern `README` diff --git a/user/grep.c b/user/grep.c index 18b50fa..428ae2f 100644 --- a/user/grep.c +++ b/user/grep.c @@ -17,6 +17,9 @@ uint8 vflag = 0; uint8 printPath = 0; +// 0 if a match, 1 if not (2 if error) +uint8 exitVal = 1; + void grep(char *pattern, int fd, char *path) { @@ -39,6 +42,7 @@ grep(char *pattern, int fd, char *path) if (printPath) fprintf(1, "\x1b[31m%s:\x1b[0m", path); write(1, p, q+1 - p); + exitVal = 0; } p = q+1; } @@ -57,7 +61,7 @@ main(int argc, char *argv[]) if(argc <= 1){ fprintf(2, "usage: grep [-F] [-R] [-v] pattern [file ...]\n"); - exit(1); + exit(2); } // check for flags @@ -76,7 +80,7 @@ main(int argc, char *argv[]) break; default: fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]); - exit(1); + exit(2); } i++; } else { @@ -89,7 +93,7 @@ main(int argc, char *argv[]) if(argc <= i){ printPath = 0; grep(pattern, 0, "\0"); - exit(0); + exit(exitVal); } // Print path if recursion is used or if multiple files are passed @@ -114,7 +118,7 @@ main(int argc, char *argv[]) close(fd); i++; } - exit(0); + exit(exitVal); } // Recursively go through directories @@ -124,6 +128,7 @@ dirgrep(char *pattern, int fd, char *path) { if (fstat(fd, &st) < 0) { fprintf(2, "grep: cannot stat %s", path); + exitVal = 2; return 1; } @@ -150,6 +155,7 @@ dirgrep(char *pattern, int fd, char *path) { // run dirgrep on the new path if ((child_fd = open(buf, 0)) < 0){ fprintf(2, "grep: cannot open %s\n", buf); + exitVal = 2; continue; }