diff --git a/.gitignore b/.gitignore index 71b86ec..9669d8f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ # Prerequisites *.d + +*.txt # Object files *.o *.ko @@ -57,8 +59,11 @@ dkms.conf obj/ bin/ __pycache__/ +.vscode/ # End of https://www.toptal.com/developers/gitignore/api/c *.db .vscode/c_cpp_properties.json -.pytest_cache \ No newline at end of file +.pytest_cache + +Database/ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..696bf24 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,29 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Debug C Project", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/bin/db-project", + "args": [], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build project", + "miDebuggerPath": "/usr/bin/gdb" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 71cb29b..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "files.associations": { - "table.h": "c" - } -} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..504f901 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,20 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build project", + "type": "shell", + "command": "make", + "args": [ + "DEBUG=1" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} \ No newline at end of file diff --git a/Documentation/btree_structure.jpg b/Documentation/btree_structure.jpg new file mode 100755 index 0000000..2e07e03 Binary files /dev/null and b/Documentation/btree_structure.jpg differ diff --git a/Documentation/build_your_own_database.tex b/Documentation/build_your_own_database.tex new file mode 100644 index 0000000..ffd502b --- /dev/null +++ b/Documentation/build_your_own_database.tex @@ -0,0 +1,1339 @@ +\documentclass[12pt,a4paper]{article} % Changed from report to article +\setlength{\headheight}{25pt} +% Essential packages for professional documentation +% Add these packages before \begin{document} +\usepackage{pgfplots} % For axis environment and plots +\usepackage{colortbl} % For \rowcolor and \arrayrulecolor +\usepackage{pgfplotstable} % For advanced table formatting + +% Use natbib instead of biblatex if biblatex isn't available +\usepackage[numbers]{natbib} +\usepackage{graphicx} +\usepackage{listings} +\usepackage{xcolor} +\usepackage{hyperref} +\usepackage{float} % Required for the [H] float specifier +\usepackage{tikz} +\usepackage{amsmath} +\usepackage{url} +\usepackage{booktabs} +\usepackage{enumitem} +\usepackage[top=1in,bottom=1in,left=1.25in,right=1.25in]{geometry} % Better margins +\usepackage{mathptmx} % Times New Roman font +\usepackage{dirtree} +\usepackage{fancyhdr} % For better headers and footers +\usepackage{titlesec} % For better section formatting +\usepackage{setspace} % For line spacing control +\usepackage{array} % For better table formatting +\usepackage{tcolorbox} % For highlighted text boxes +\usepackage{mdframed} % For framed text +\usepackage{lipsum} % For sample text +\usepackage{microtype} % For better typography +\usepackage{caption} % For better figure captions +\usepackage{subcaption} % For subfigures +\usepackage{pdfpages} % For including external PDF documents +\usepackage{rotating} % For rotated tables and figures +\usepackage{tabularx} % For better tables + +% Better bibliography management +% \usepackage[backend=biber,style=ieee,sorting=none]{biblatex} + +% Color definitions for a professional color scheme +\definecolor{uetblue}{RGB}{0, 74, 128} +\definecolor{lightgray}{RGB}{245, 245, 245} +\definecolor{darkgray}{RGB}{64, 64, 64} +\definecolor{accent}{RGB}{185, 35, 45} + +% Set up fancy headers and footers with custom colors +\pagestyle{fancy} +\fancyhf{} % Clear all header and footer fields +\fancyhead[L]{\textcolor{darkgray}{\slshape Build Your Own Database}} +\fancyhead[R]{\textcolor{darkgray}{\slshape\nouppercase{\leftmark}}} +\fancyfoot[C]{\textcolor{darkgray}{\thepage}} +\renewcommand{\headrulewidth}{0.4pt} +\renewcommand{\footrulewidth}{0.4pt} +\renewcommand{\headrule}{\hbox to\headwidth{\color{uetblue}\leaders\hrule height \headrulewidth\hfill}} +\renewcommand{\footrule}{\hbox to\headwidth{\color{uetblue}\leaders\hrule height \footrulewidth\hfill}} + +% Change section formatting to section formatting +\titleformat{\section} + {\normalfont\LARGE\bfseries\color{uetblue}} + {\thesection} + {1em} + {} +\titlespacing*{\section}{0pt}{20pt}{15pt} + +\titleformat{\subsection} + {\normalfont\Large\bfseries\color{uetblue}} + {\thesubsection} + {1em} + {} +\titlespacing*{\subsection}{0pt}{15pt}{10pt} + +\titleformat{\subsubsection} + {\normalfont\large\bfseries\color{darkgray}} + {\thesubsubsection} + {1em} + {} +\titlespacing*{\subsubsection}{0pt}{10pt}{8pt} + +% Enhanced caption setup for professional look +\captionsetup{font=small,labelfont={bf,color=uetblue}} + +% Line spacing - slightly more professional than onehalfspacing +\setstretch{1.15} + +% Custom environment for boxed highlights +\newenvironment{highlight}{% + \begin{tcolorbox}[ + colback=lightgray, + colframe=uetblue, + boxrule=0.5pt, + arc=2mm, + beforeafter skip=12pt, + width=\textwidth, + enlarge left by=-2mm, + enlarge right by=-2mm + ] +}{% + \end{tcolorbox} +} + +% Code listing style with professional colors +\definecolor{codegreen}{rgb}{0,0.6,0} +\definecolor{codegray}{rgb}{0.5,0.5,0.5} +\definecolor{codepurple}{rgb}{0.58,0,0.82} +\definecolor{backcolour}{rgb}{0.98,0.98,0.98} + +\lstdefinestyle{mystyle}{ + backgroundcolor=\color{backcolour}, + commentstyle=\color{codegreen}, + keywordstyle=\color{uetblue}, + numberstyle=\tiny\color{codegray}, + stringstyle=\color{codepurple}, + basicstyle=\ttfamily\footnotesize, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=10pt, + showspaces=false, + showstringspaces=false, + showtabs=false, + tabsize=2, + frame=single, + framesep=5pt, + framerule=0pt, + xleftmargin=15pt, + framexleftmargin=15pt, + framexrightmargin=5pt, + framexbottommargin=5pt, + framextopmargin=5pt +} + +\lstset{style=mystyle} + +% Better PDF metadata and hyperlinks +\hypersetup{ + colorlinks=true, + linkcolor=uetblue, + filecolor=accent, + urlcolor=uetblue, + citecolor=accent, + pdftitle={Build Your Own Database: A SQLite-Inspired Database Management System}, + pdfauthor={Hamid Riaz, Sher Muhammad, Ahmad Butt, Abdul Rehman}, + pdfkeywords={Database, B-Tree, SQL, Database Management System, SQLite}, + pdfsubject={Advanced Database Management System Project}, + pdfcreator={LaTeX}, + pdfproducer={LaTeX with hyperref}, + pdfborder={0 0 0}, + pdfpagemode=UseOutlines, + bookmarksopen=true +} + +% Professional title page design +\title{% + + \begin{center} + \includegraphics[width=3cm]{images/uet_logo.png}\\[0.5cm] + \rule{\linewidth}{2pt}\\[0.4cm] + {\huge\bfseries\textcolor{uetblue}{Build Your Own Database}}\\[0.3cm] + {\Large A SQLite-Inspired Database Management System}\\[0.5cm] + \vspace{0.3cm} + \rule{\linewidth}{1.2pt}\\[0.3cm] + {\large Final Project Report}\\ + {\large Advanced Database Management System}\\ + \vspace{0.5cm} + \rule{\linewidth}{2pt}\\ + \end{center} +} + +\author{% + \begin{tabular}{c} + \large\textbf{Team Members} \\[0.4cm] + \begin{tabular}{rc} + \textbf{Hamid Riaz} & 2023-CS-10 \\[0.3cm] + \textbf{Sher Muhammad} & 2023-CS-15 \\[0.3cm] + \textbf{Ahmad Butt} & 2023-CS-18 \\[0.3cm] + \textbf{Abdul Rehman} & 2023-CS-20 + \end{tabular} + \end{tabular} +} +\date{} +\begin{document} + +% Remove page numbering from title page and front matter + + +\begin{titlepage} + \maketitle + + \begin{center} + \large\textbf{Supervisor}\\ + \large{Sir Atif} + \vspace{0.3cm} + + % University logo could go here + + \Large \textbf{University of Engineering and Technology}\\ + \large \textbf{Lahore, Pakistan} + \end{center} + + \vfill + + \begin{abstract} + \begin{center} + \Large\textbf{Abstract} + \end{center} + \vspace{0.3cm} + \noindent This report presents the design and implementation of a sophisticated, command-line database management system inspired by SQLite. The system addresses fundamental challenges in file-based storage through B-tree indexing, dynamic data structures, and efficient memory management. The implementation demonstrates core database concepts including data organization, query processing, and persistence mechanisms, all while maintaining a modular architecture for extensibility and maintenance. Performance analysis confirms logarithmic time complexity for key operations, representing a significant improvement over traditional file-based approaches. + \end{abstract} + + \vfill + \begin{figure}[h] + \centering + \includegraphics[width=0.75\textwidth]{images/database.png} + \caption*{\textit{Visual representation of the database system architecture}} + \end{figure} +\end{titlepage} + +% After titlepage +% Roman numerals for front matter +\pagenumbering{roman} + +% Dedication page (optional) +\begin{center} + \vspace*{5cm} + \large\textit{Dedicated to our teachers and mentors\\ + who supported us throughout this project.} + \vspace{2cm} +\end{center} + + +% Acknowledgments - change from section* to section* +\section*{Acknowledgments} +\addcontentsline{toc}{section}{Acknowledgments} + +We would like to express our profound gratitude to our supervisor, Sir Atif, for his invaluable guidance, expertise, and continuous support throughout the development of this project. His insights and feedback significantly enhanced the quality of our work. + +We also extend our appreciation to the Department of Computer Science at the University of Engineering and Technology, Lahore, for providing the necessary resources and creating an environment conducive to research and learning. + +Our sincere thanks to our peers who participated in testing the system and providing constructive feedback that helped refine our implementation. + +Finally, we acknowledge the developers of SQLite whose open architecture served as inspiration for this educational project. + +\clearpage + +% Table of contents +\renewcommand{\contentsname}{\textcolor{uetblue}{\large\bfseries Contents}} +\tableofcontents + +% Lists +\renewcommand{\listfigurename}{\textcolor{uetblue}{\large\bfseries List of Figures}} +\listoffigures +\renewcommand{\listtablename}{\textcolor{uetblue}{\large\bfseries List of Tables}} +\listoftables +\clearpage + + +% Executive Summary - change from section* to section* +\section*{Executive Summary} +\addcontentsline{toc}{section}{Executive Summary} + +The "Build Your Own Database" project implements a lightweight yet powerful database management system inspired by SQLite. This educational project demonstrates the fundamental principles of database systems by constructing a functional DBMS from first principles. + +\begin{highlight} +\textbf{Key Accomplishments:} +\begin{itemize} + \item Implemented a B+ Tree index structure that enables logarithmic-time data access + \item Developed a pager system for efficient disk I/O and memory management + \item Created a SQL-like command interface for intuitive data manipulation + \item Designed a catalog system for metadata management across multiple databases +\end{itemize} +\end{highlight} + +The system architecture follows a modular approach with clear separation of concerns, facilitating extensibility and maintenance. Performance analysis confirms that our implementation achieves O(log n) complexity for key operations, representing a significant improvement over traditional file-based systems. + +This report describes the design decisions, implementation details, and evaluation results of our database system. Through this project, we have gained practical experience with core database concepts including indexing structures, buffer management, query processing, and persistent storage. + +% Start regular page numbering for main content +\clearpage +\pagenumbering{arabic} + +\section{Introduction} + +\section{Project Overview} +Contemporary data-driven applications fundamentally rely on efficient database systems that provide robust mechanisms for storing, retrieving, and manipulating structured information. Traditional file-based storage systems present numerous limitations, including significant performance bottlenecks, absence of indexing capabilities, and inadequate support for concurrent access. This project implements a sophisticated database engine that addresses these limitations through purpose-built data structures and algorithms. + +This database engine has been meticulously constructed from first principles, implementing fundamental database functionality without dependency on existing database systems. By emphasizing core concepts such as B+ Tree indexing, pagination, and command processing, the project offers comprehensive insights into the operational principles of modern database management systems. + +\begin{figure}[h] +\centering +\includegraphics[width=0.8\textwidth]{images/database-engine-architecture.png} +\caption{Balanced B+ Tree Structure with Internal Routing Nodes and Data-Containing Leaf Nodes}\label{fig:btree_example} +\end{figure} + +\section{Challenges in Traditional File-Based Storage} + +File-based storage systems encounter numerous limitations that significantly impact their efficiency and practical utility in data management applications: + +\begin{mdframed}[linecolor=uetblue, linewidth=1pt, backgroundcolor=lightgray, roundcorner=10pt, innerleftmargin=10pt, innerrightmargin=10pt] +\begin{itemize} + \item \textbf{Sequential Access Constraints}: File systems are inherently optimized for sequential access patterns, resulting in suboptimal performance characteristics when executing random lookups across large datasets. + + \item \textbf{Absence of Indexing Mechanisms}: File systems lack native support for indexing structures, necessitating complete dataset scans for record retrieval operations. + + \item \textbf{Data Redundancy Issues}: In the absence of structured schema definitions, data duplication frequently occurs, leading to potential inconsistencies in the stored information. + + \item \textbf{Limited Query Capabilities}: Traditional file systems provide minimal mechanisms for filtering and selecting specific data subsets based on conditional criteria. + + \item \textbf{Concurrency Control Deficiencies}: Multiple processes accessing shared files simultaneously can result in data corruption without proper locking and transaction management mechanisms. + + \item \textbf{Schema Evolution Complexities}: Modifying the structure of data stored in file-based systems typically requires comprehensive reformatting of all existing records. + + \item \textbf{Transaction Support Limitations}: File systems lack atomic operation guarantees necessary for ensuring data consistency across related modifications. +\end{itemize} +\end{mdframed} + +Our database implementation systematically addresses these challenges through specialized data structures, sophisticated memory management techniques, and a structured approach to data storage and retrieval operations. + +\section{Project Objectives} + +The primary objectives of this database implementation include: + +\begin{itemize} + \item Developing an efficient B+ Tree based indexing system for fast data retrieval + \item Implementing a robust paging mechanism to manage memory and disk I/O + \item Creating a SQL-like command interface for data manipulation + \item Supporting multiple data types for flexibility in data storage + \item Enabling multi-database and multi-table operations + \item Providing persistence with efficient file handling + \item Maintaining a clear separation of concerns through modular design +\end{itemize} + +\section{Key Features} + +The database system implements several key features that address the limitations of file-based storage: + +\begin{itemize} + \item \textbf{B+ Tree Indexing}: Enables O(log n) data lookups instead of linear scans + \item \textbf{Paged Storage}: Manages data in fixed-size blocks for efficient I/O + \item \textbf{Dynamic Schema}: Supports tables with varying column types and sizes + \item \textbf{Multiple Data Types}: Handles integers, strings, floats, booleans, dates, etc. + \item \textbf{Multi-Database Support}: Manages multiple independent databases + \item \textbf{SQL-Like Interface}: Provides familiar commands for data manipulation + \item \textbf{Catalog Management}: Maintains metadata about databases and tables + \item \textbf{Meta-Commands}: Offers system utilities like viewing tree structure +\end{itemize} + +\section{System Architecture} + +\subsection{Architectural Overview} + +The database system implements a comprehensive layered architecture that enforces strict separation of concerns and promotes modularity across all components. Each layer has well-defined responsibilities and communicates with adjacent layers through clean interfaces, enhancing maintainability and extensibility. + +\begin{figure}[H] +\centering +\begin{tikzpicture}[ + node distance=1.5cm, + block/.style={draw, rectangle, rounded corners, minimum width=4cm, minimum height=1cm, fill=uetblue!10, text width=3.8cm, align=center, font=\small\bfseries}, + arrow/.style={thick,->,>=stealth} +] +\node[block] (user) {User Interface Layer}; +\node[block, below of=user] (parser) {Command Processing Layer}; +\node[block, below of=parser] (executor) {Statement Execution Layer}; +\node[block, below of=executor] (btree) {B+ Tree Management Layer}; +\node[block, below of=btree] (pager) {Memory \& I/O Management Layer}; +\node[block, below of=pager] (storage) {Persistent Storage Layer}; + +\draw[arrow] (user) -- (parser) node[midway, right, font=\footnotesize] {SQL Commands}; +\draw[arrow] (parser) -- (executor) node[midway, right, font=\footnotesize] {Parsed Statements}; +\draw[arrow] (executor) -- (btree) node[midway, right, font=\footnotesize] {Data Operations}; +\draw[arrow] (btree) -- (pager) node[midway, right, font=\footnotesize] {Page Requests}; +\draw[arrow] (pager) -- (storage) node[midway, right, font=\footnotesize] {File Operations}; +\end{tikzpicture} +\caption{Layered System Architecture with Interface Definitions} +\label{fig:architecture_detailed} +\end{figure} + +\subsection{Layer Responsibilities} + +\begin{enumerate} + \item \textbf{User Interface Layer:} Manages interaction with users through the command-line interface, presenting query results and system messages in a user-friendly format. + + \item \textbf{Command Processing Layer:} Implements a recursive descent parser that transforms SQL-like textual commands into structured statement representations. Includes lexical analysis, syntax validation, and semantic checking. + + \item \textbf{Statement Execution Layer:} Orchestrates the execution of parsed statements by coordinating actions across lower layers. Implements the logical operations required for each command type. + + \item \textbf{B+ Tree Management Layer:} Provides the core indexing and data organization capabilities through B+ Tree implementation. Handles all tree operations including searches, insertions, deletions, and maintenance of balanced structure. + + \item \textbf{Memory \& I/O Management Layer:} Implements the pager system that mediates between memory and disk storage. Manages the page cache, handles data serialization/deserialization, and implements the buffer pool. + + \item \textbf{Persistent Storage Layer:} Interfaces directly with the file system, implementing durable storage through a structured file format and directory hierarchy. +\end{enumerate} + +\subsection{Component Interactions} + +The system components interact through carefully defined interfaces that facilitate modular development and testing: + +\begin{mdframed}[linecolor=uetblue, linewidth=1pt, backgroundcolor=lightgray, roundcorner=10pt, innerleftmargin=10pt, innerrightmargin=10pt] +\begin{itemize} + \item \textbf{Input Handling → Command Processor:} User commands are sent to the parser for syntactic and semantic analysis. + + \item \textbf{Command Processor → Statement Executor:} Parsed statement structures are passed to the executor for implementation. + + \item \textbf{Statement Executor → B+ Tree Manager:} Logical operations are translated to B+ Tree operations such as search, insert, and delete. + + \item \textbf{B+ Tree Manager → Pager:} Tree operations request specific pages from the paging system. + + \item \textbf{Pager → File Manager:} Page requests that cannot be satisfied from cache trigger file I/O operations. +\end{itemize} +\end{mdframed} + +\subsection{Data Flow Architecture} + +Data flows through the system in a bidirectional manner: + +\begin{figure}[H] +\centering +\includegraphics[width=0.8\textwidth]{images/dbflow_actual.png} +\caption{System Data Flow Diagram} +\label{fig:data_flow} +\end{figure} + +This architecture facilitates efficient data movement while maintaining clear separation between user-facing components and internal storage mechanisms. + +\section{Core Components} + +\subsection{Command Processor} +The command processor is responsible for parsing SQL-like commands and converting them into structured statements that the execution engine can process. This component implements a simple recursive descent parser that handles various SQL commands including: + +\begin{itemize} + \item Data Definition Language (DDL): CREATE TABLE, USE TABLE + \item Data Manipulation Language (DML): INSERT, SELECT, UPDATE, DELETE + \item Database Management: CREATE DATABASE, USE DATABASE + \item Meta-Commands: .btree, .constants, .exit +\end{itemize} + +The parser extracts relevant information such as table names, column definitions, and filter conditions from the input commands. This information is stored in statement structures that guide subsequent execution. + +\subsection{B+ Tree Implementation} +The B+ Tree is the core data structure for efficient indexing and storage. It addresses the sequential access problem of traditional files by enabling logarithmic-time lookups, insertions, and deletions. Unlike standard binary trees, the B+ Tree structure offers several advantages: + +\begin{itemize} + \item \textbf{Tree Structure}: A balanced tree with internal nodes containing only keys and leaf nodes containing both keys and data + \item \textbf{Linked Leaf Nodes}: All leaf nodes are linked together, facilitating efficient sequential access and range queries + \item \textbf{High Fanout}: Each node can contain multiple keys, reducing tree height and minimizing disk access + \item \textbf{Dynamic Node Management}: Nodes split when they become full, maintaining optimal tree balance + \item \textbf{Key-Based Organization}: Records are organized by primary key for efficient lookup +\end{itemize} + +\begin{figure}[H] +\centering +\includegraphics[width=0.7\textwidth]{images/btree_structure.jpg} +\caption{B+ Tree Structure Showing Internal Routing Nodes and Linked Leaf Nodes Containing Data} +\label{fig:btree_structure} +\end{figure} + +% Add this to emphasize B+ Tree advantages over standard B-Trees +\begin{highlight} +\textbf{B+ Tree vs. Standard B-Tree:} Our implementation uses B+ Trees rather than standard B-Trees for several important reasons: +\begin{itemize} + \item All data records are stored exclusively in leaf nodes, making internal nodes more compact + \item The linked list connection between leaf nodes enables efficient sequential traversal + \item Range queries are significantly more efficient due to the leaf node connections + \item More keys can fit in internal nodes, reducing tree height and improving performance +\end{itemize} +\end{highlight} + +\subsection{Pager} +The pager serves as a buffer management system, mediating between memory and disk. It addresses I/O inefficiency by: + +\begin{itemize} + \item \textbf{Page Caching}: Frequently accessed pages remain in memory + \item \textbf{Lazy Loading}: Pages are loaded from disk only when needed + \item \textbf{Dirty Page Tracking}: Only modified pages are written back to disk + \item \textbf{Fixed-Size Pages}: All I/O operations work with consistent page sizes +\end{itemize} + +This component significantly improves performance by reducing disk I/O operations and managing memory efficiently. + +\subsection{Dynamic Row Management} +The dynamic row system addresses the inflexibility of traditional fixed-record storage by: + +\begin{itemize} + \item \textbf{Variable-Sized Records}: Records can have different sizes based on content + \item \textbf{Type-Specific Storage}: Different data types are handled appropriately + \item \textbf{Memory Optimization}: Only the necessary space is allocated for each record + \item \textbf{Serialization/Deserialization}: Efficient conversion between in-memory and disk formats +\end{itemize} + +\subsection{Catalog Management} +The catalog system maintains metadata about the database structure, addressing schema management challenges through: + +\begin{itemize} + \item \textbf{Table Definitions}: Storing column names, types, and sizes + \item \textbf{Database Organization}: Managing multiple tables within databases + \item \textbf{Persistence}: Saving metadata to disk for durability + \item \textbf{Schema Evolution}: Supporting changes to database structure +\end{itemize} + +\section{Implementation Logic} + +Our database implementation follows a comprehensive strategy that balances theoretical principles with practical performance considerations. The following sections detail the specific implementation approaches for the core components of our system. + +\subsection{B+ Tree Implementation} + +The B+ tree data structure is the cornerstone of our database implementation, providing efficient data access with O(log n) complexity: + +\begin{lstlisting}[language=C] +void leaf_node_insert(Cursor *cursor, uint32_t key, DynamicRow *row, TableDef *table_def) +{ + void *node = get_page(cursor->table->pager, cursor->page_num); + uint32_t num_cells = *leaf_node_num_cells(node); + uint32_t value_size = row->data_size; + uint32_t cell_size = LEAF_NODE_CELL_HEADER_SIZE + value_size; + + /* Check space, then insert at cursor position */ +} +\end{lstlisting} + +We implement B+ trees rather than standard B-Trees for three critical reasons: +\begin{enumerate} + \item \textbf{Data-Leaf Separation}: All actual data records are stored exclusively in leaf nodes, making internal nodes more compact and allowing higher fanout + \item \textbf{Linked Leaves}: Leaf nodes are doubly-linked, enabling efficient range queries and sequential scans + \item \textbf{Memory Efficiency}: The separation of routing (internal nodes) and storage (leaf nodes) optimizes the balance between memory usage and access speed +\end{enumerate} + +\subsection{Dynamic Memory Management} + +The pager component mediates between disk and memory through a carefully designed buffering system: + +\begin{lstlisting}[language=C] +void *get_page(Pager *pager, uint32_t page_num) +{ + if (pager->pages[page_num] == NULL) + { + /* Lazy loading: allocate memory and load from disk only when needed */ + } + return pager->pages[page_num]; +} +\end{lstlisting} + +This implementation provides several performance advantages: +\begin{enumerate} + \item Uses \textbf{lazy loading} to avoid unnecessary I/O operations + \item Maintains a cache of recently accessed pages to improve performance + \item Tracks dirty pages and flushes only modified data, minimizing disk writes +\end{enumerate} + +\subsection{Dynamic Row Management} + +We implement variable-size records rather than fixed-size rows to accommodate different data types efficiently: + +\begin{lstlisting}[language=C] +void dynamic_row_init(DynamicRow* row, TableDef* table_def) { + /* Calculate total size based on schema */ + uint32_t size = 0; + for (uint32_t i = 0; i < table_def->num_columns; i++) { + /* Add space for each column based on its type */ + } + row->data = malloc(size); + row->data_size = size; +} +\end{lstlisting} + +This approach offers significant advantages: +\begin{enumerate} + \item Calculates exact memory requirements based on the table schema + \item Allocates only the necessary space for each record + \item Provides type-specific accessors for data manipulation + \item Handles serialization for disk storage and deserialization for memory access +\end{enumerate} + +\subsection{Command Processing Logic} + +The command processor implements a recursive descent parser to handle SQL-like commands: + +\begin{lstlisting}[language=C] +PrepareResult prepare_statement(Input_Buffer *buf, Statement *statement) +{ + if (strncasecmp(buf->buffer, "insert", 6) == 0) { + return prepare_insert(buf, statement); + } + else if (strncasecmp(buf->buffer, "select", 6) == 0) { + /* Handle SELECT command */ + } + /* Handle other command types */ +} +\end{lstlisting} + +Key aspects of this implementation include: +\begin{enumerate} + \item Parsing commands based on SQL syntax conventions + \item Validating parameters and constraints before execution + \item Building structured representation of commands for the execution engine + \item Providing clear error messages for syntax issues +\end{enumerate} + +\subsection{Multi-Database Support} + +The database component manages multiple independent databases within the file system: + +\begin{lstlisting}[language=C] +Database* db_create_database(const char* name) { + /* Create directory structure */ + char database_dir[512]; + snprintf(database_dir, sizeof(database_dir), "Database/%s", name); + + /* Create Tables subdirectory */ + char tables_dir[512]; + snprintf(tables_dir, sizeof(tables_dir), "%s/Tables", database_dir); +} +\end{lstlisting} + +This organizational approach provides: +\begin{enumerate} + \item A hierarchical directory structure for each database + \item A separate catalog file for each database's metadata + \item Isolation of tables into separate files for modularity and concurrent access + \item Clean switching between databases without restart +\end{enumerate} + +\subsection{B+ Tree Node Management} + +Nodes in the B+ Tree use a specialized structure to enable efficient operations: + +\begin{lstlisting}[language=C] +void initialize_leaf_node(void *node) +{ + set_node_type(node, NODE_LEAF); + set_node_root(node, false); + *leaf_node_num_cells(node) = 0; + *leaf_node_next_leaf(node) = 0; // 0 means no sibling +} +\end{lstlisting} + +The key design aspects include: +\begin{enumerate} + \item \textbf{Common Header}: All nodes share a common header (type, root flag, parent pointer) + \item \textbf{Specialized Node Types}: Leaf nodes store data, internal nodes store routing information + \item \textbf{Linked Leaves}: Next-leaf pointers connect leaf nodes for sequential scanning + \item \textbf{Variable-Sized Cells}: Each node can store multiple records with variable sizes +\end{enumerate} + +\subsection{Split and Rebalance Logic} + +When a leaf node becomes full, we implement a careful splitting process: + +\begin{lstlisting}[language=C] +void leaf_node_split_and_insert(Cursor *cursor, uint32_t key, DynamicRow *row, TableDef *table_def) +{ + /* Create a new leaf node */ + void *old_node = get_page(cursor->table->pager, cursor->page_num); + uint32_t new_page_num = get_unused_page_num(cursor->table->pager); + void *new_node = get_page(cursor->table->pager, new_page_num); + + /* Redistribute records between old and new nodes */ + /* Update parent or create new root if necessary */ +} +\end{lstlisting} + +This implementation ensures tree balance through: +\begin{enumerate} + \item Allocation of a new leaf node + \item Linking it into the existing leaf chain + \item Distribution of records evenly between the two nodes + \item Updating parent nodes or creating a new root if needed + \item Maintaining the balanced property of the B+ Tree +\end{enumerate} + +\subsection{Catalog Management} + +The catalog system manages database metadata efficiently: + +\begin{lstlisting}[language=C] +bool catalog_save(Catalog* catalog, const char* db_name) { + /* Write catalog information to disk */ + char filename[512]; + snprintf(filename, sizeof(filename), "Database/%s/%s.catalog", db_name, db_name); + + /* Write number of tables, active table, columns, etc. */ +} +\end{lstlisting} + +This approach provides several benefits: +\begin{enumerate} + \item Centralized storage of metadata in a dedicated catalog file + \item Persistence of table definitions, column types, and sizes + \item Maintenance of location information for table data files + \item Efficient schema lookup during command execution +\end{enumerate} + +\subsection{Type System Implementation} + +The implementation supports multiple data types through specialized handling: + +\begin{lstlisting}[language=C] +void dynamic_row_set_string(DynamicRow* row, TableDef* table_def, uint32_t col_idx, const char* value) { + /* Calculate offset for this column */ + uint32_t offset = get_column_offset(table_def, col_idx); + uint32_t max_str_size = table_def->columns[col_idx].size; + + /* Copy string with bounds checking */ + size_t value_len = strlen(value); + size_t copy_len = (value_len < max_str_size) ? value_len : max_str_size - 1; + + memcpy((char*)row->data + offset, value, copy_len); + ((char*)row->data)[offset + copy_len] = '\0'; +} +\end{lstlisting} + +For each data type, we implement: +\begin{enumerate} + \item Type-specific storage with appropriate size allocation + \item Specialized serialization and deserialization routines + \item Bounds checking and type validation + \item Efficient memory layout with calculated offsets +\end{enumerate} + +\subsection{Database and Table Operations} + +The implementation offers multiple operations for database manipulation: + +\begin{lstlisting}[language=C] +bool db_create_table(Database* db, const char* name, ColumnDef* columns, uint32_t num_columns) { + /* Add table to catalog */ + if (!catalog_add_table(&db->catalog, name, columns, num_columns)) { + return false; + } + + /* Set table as active and initialize */ + catalog_set_active_table(&db->catalog, name); + TableDef* table_def = catalog_get_active_table(&db->catalog); + + /* Create and open table file */ +} +\end{lstlisting} + +We implement these operations to ensure: +\begin{enumerate} + \item Consistency between the catalog and filesystem + \item Proper initialization of table structures + \item Efficient resource handling, closing inactive tables + \item Immediate persistence of metadata changes +\end{enumerate} + +\subsection{Tree Traversal and Search} + +The implementation provides efficient tree traversal mechanisms: + +\begin{lstlisting}[language=C] +Cursor *table_find(Table *table, uint32_t key) +{ + uint32_t root_page_num = table->root_page_num; + void *root_node = get_page(table->pager, root_page_num); + + if (get_node_type(root_node) == NODE_LEAF) + { + return leaf_node_find(table, root_page_num, key); + } + else + { + return internal_node_find(table, root_page_num, key); + } +} +\end{lstlisting} + +This approach enables efficient data access through: +\begin{enumerate} + \item A cursor abstraction to represent a position in the table + \item Binary search within nodes for efficient key lookup + \item Navigation of the tree structure following B+ Tree traversal rules + \item Result retrieval with O(log n) complexity +\end{enumerate} + +\begin{highlight} +The design of this database implementation demonstrates careful consideration of data structures, algorithms, and resource management to create an efficient, maintainable SQLite-inspired database engine. Each component is designed with specific performance and maintainability goals in mind, working together to provide a cohesive system. +\end{highlight} + +\section{Database Organization} + +The database system organizes data in a hierarchical structure: + +\dirtree{% +.1 Database/. +.2 database\_name1/. +.3 Tables/. +.4 table1.tbl. +.4 table2.tbl. +.3 database\_name1.catalog. +.2 database\_name2/. +.3 Tables/. +.4 table1.tbl. +.4 table2.tbl. +.3 database\_name2.catalog. +} + +This organization allows for: + +\begin{itemize} + \item Multiple independent databases + \item Separation of table data files + \item Centralized catalog for each database + \item Structured file paths for easy navigation +\end{itemize} + +When a new database is created, the system establishes this directory structure and initializes an empty catalog. Table files are created on demand as tables are defined. + +\section{Table Structure and Storage} + +Tables are stored in .tbl files, which contain: + +\begin{itemize} + \item A header page with table metadata + \item B+ Tree nodes for efficient indexing + \item Data pages containing the actual records +\end{itemize} + +The B+ Tree structure begins with a single root node, which can be either a leaf node (for small tables) or an internal node (for larger tables). As data is inserted, the tree grows organically, maintaining balance for optimal access performance. + +\section{Node Structure} + +B+ Tree nodes are the fundamental building blocks of data storage and come in two types: + +\begin{itemize} + \item \textbf{Leaf Nodes}: Store actual data records + \item \textbf{Internal Nodes}: Store keys and pointers to child nodes +\end{itemize} + +\subsection{Leaf Node Structure} +Leaf nodes in our B+ Tree contain: + +\begin{itemize} + \item Common node header (type, root flag, parent pointer) + \item Number of cells (records) + \item Next leaf pointer (for connecting to the next leaf node) + \item Previous leaf pointer (for bidirectional traversal) + \item Array of cells (key-value pairs) +\end{itemize} + +This linked structure of leaf nodes is a key characteristic of B+ Trees, facilitating efficient range queries and sequential scans. + +\subsection{Internal Node Structure} +Internal nodes contain only keys and pointers, without actual data: + +\begin{itemize} + \item Common node header + \item Number of keys + \item Pointers to child nodes + \item Keys that separate child subtrees +\end{itemize} + +By storing data only in leaf nodes, internal nodes can maintain more keys and pointers, reducing the overall height of the tree and improving lookup performance. + +\section{Page Management Logic} + +The pager component implements a simple buffer pool manager that: + +\begin{enumerate} + \item Maintains an array of page pointers + \item Loads pages from disk on first access + \item Caches pages in memory for future access + \item Tracks dirty pages that need writing back to disk + \item Flushes modified pages to disk on database close +\end{enumerate} + +This approach minimizes disk I/O by keeping frequently accessed pages in memory while ensuring data durability by persisting changes when needed. + +\section{B+ Tree Operations} + +\subsection{Search Algorithm} +B+ Tree searches use the following algorithm: + +\begin{enumerate} + \item Start at the root node + \item If current node is a leaf, search for the key within the node + \item If current node is internal: + \begin{itemize} + \item Perform binary search to find appropriate child + \item Move to that child node + \item Repeat until reaching a leaf + \end{itemize} +\end{enumerate} + +This algorithm achieves O(log n) search time complexity, significantly outperforming linear scans of unindexed files. + +\subsection{Insert Algorithm} +The insert operation follows these steps: + +\begin{enumerate} + \item Search for the appropriate leaf node + \item If the leaf has space, insert the record directly + \item If the leaf is full: + \begin{itemize} + \item Split the leaf node into two nodes + \item Distribute records evenly between nodes + \item Insert a separation key in the parent + \item If parent is full, recursively split upward + \item Create a new root if necessary + \end{itemize} +\end{enumerate} + +This approach maintains the balanced nature of the B+ Tree, ensuring consistent performance as the dataset grows. + +\subsection{Delete Algorithm} +Deletion works as follows: + +\begin{enumerate} + \item Search for the leaf node containing the key + \item If found, remove the record + \item Shift remaining records to fill the gap + \item Update cell count +\end{enumerate} + +For simplicity, this implementation does not implement node merging or rebalancing after deletion, which could be added in future enhancements. + +\section{Dynamic Row Management} + +The dynamic row system handles variable-sized records by: + +\begin{enumerate} + \item Calculating required space based on column types and sizes + \item Allocating memory for the entire row + \item Managing column data at specific offsets within the row + \item Providing type-specific accessors for data manipulation + \item Handling serialization for disk storage and deserialization for memory access +\end{enumerate} + +This approach allows efficient storage of different data types while maintaining a consistent interface for data manipulation. + +\section{Command Processing Logic} + +The command processor implements a multi-stage parsing approach: + +\begin{enumerate} + \item Identify command type (DDL, DML, or meta-command) + \item Tokenize command string to extract components + \item Parse components based on command syntax + \item Validate parameters and constraints + \item Prepare statement structure for execution +\end{enumerate} + +This approach provides a flexible command interface that can be extended to support additional SQL features as needed. + +\section{Data Type Implementation} + +The system supports multiple data types through specialized handling: + +\begin{itemize} + \item \textbf{INT}: 32-bit integer storage + \item \textbf{STRING}: Variable-length character storage with null termination + \item \textbf{FLOAT}: IEEE-754 floating-point representation + \item \textbf{BOOLEAN}: Single-byte storage (0 or 1) + \item \textbf{DATE}: Internal integer representation with formatting utilities + \item \textbf{TIME}: Seconds-based representation with formatting + \item \textbf{BLOB}: Variable-length binary data with size header +\end{itemize} + +Each type has specific serialization, deserialization, and comparison logic to ensure correct behavior during database operations. + +\section{Database Operations} + +\section{Creating and Managing Databases} + +The database creation process involves: + +\begin{enumerate} + \item Creating the database directory structure + \item Initializing an empty catalog file + \item Setting up the Tables subdirectory +\end{enumerate} + +Switching between databases requires: + +\begin{enumerate} + \item Closing the current database (if any) + \item Opening the specified database + \item Loading its catalog + \item Activating the current table (if one exists) +\end{enumerate} + +\section{Creating and Managing Tables} + +Table creation involves: + +\begin{enumerate} + \item Parsing column definitions (names, types, sizes) + \item Adding the table to the database catalog + \item Creating an empty table file + \item Initializing the B+ Tree structure with a single root node + \item Saving the updated catalog +\end{enumerate} + +Switching between tables entails: + +\begin{enumerate} + \item Updating the active table in the catalog + \item Closing the current table file + \item Opening the specified table file + \item Connecting to its B+ Tree structure +\end{enumerate} + +\section{Data Manipulation} + +\subsection{Insert Operation} +Data insertion follows this process: + +\begin{enumerate} + \item Verify the active table and schema + \item Parse and validate input values + \item Create a dynamic row with the appropriate data + \item Find the insertion position in the B+ Tree + \item Check for key duplicates + \item Insert the record, potentially splitting nodes + \item Update tree structure as needed +\end{enumerate} + +\subsection{Select Operation} +Data retrieval works as follows: + +\begin{enumerate} + \item For SELECT *: + \begin{itemize} + \item Create cursor at table start + \item Iterate through all records + \item Deserialize and display each record + \end{itemize} + \item For SELECT WHERE id = x: + \begin{itemize} + \item Search B+ Tree for specific key + \item Deserialize and display matching record + \end{itemize} +\end{enumerate} + +\subsection{Update Operation} +Updates are processed as: + +\begin{enumerate} + \item Locate the record by key + \item Deserialize the current data + \item Apply the requested changes + \item Serialize the modified data back to the same location +\end{enumerate} + +\subsection{Delete Operation} +Deletion works by: + +\begin{enumerate} + \item Locate the record by key + \item Remove the record from the leaf node + \item Shift remaining records to maintain contiguous storage + \item Update the cell count in the node header +\end{enumerate} + +\section{Technical Considerations} + +\section{Memory Management} + +Memory management is critical for database performance and reliability. The system implements several strategies: + +\begin{itemize} + \item \textbf{Page Caching}: Frequently accessed pages remain in memory + \item \textbf{Dynamic Allocation}: Memory is allocated as needed for variable-sized structures + \item \textbf{Resource Cleanup}: Memory is properly freed when no longer needed + \item \textbf{Buffer Management}: Fixed-size buffers handle user input with graceful overflow handling +\end{itemize} + +These strategies ensure efficient memory usage while preventing leaks and corruption. + +\section{File Management} + +The file management approach addresses durability and performance concerns: + +\begin{itemize} + \item \textbf{Structured Directory Layout}: Organizes files logically + \item \textbf{Catalog Persistence}: Ensures metadata survives between sessions + \item \textbf{Lazy Writing}: Minimizes disk I/O by deferring writes + \item \textbf{Atomic Operations}: Ensures data integrity during writes + \item \textbf{Error Handling}: Manages file system errors gracefully +\end{itemize} + +\section{Error Handling} + +The system implements comprehensive error handling: + +\begin{itemize} + \item \textbf{Input Validation}: Checks command syntax before execution + \item \textbf{Resource Verification}: Ensures resources exist before use + \item \textbf{Constraint Checking}: Validates data against defined constraints + \item \textbf{Graceful Degradation}: Continues operation when possible despite errors + \item \textbf{Informative Messages}: Provides clear error descriptions to users +\end{itemize} + +\section{Evaluation and Analysis} + +\subsection{Performance Methodology} + +To evaluate the performance characteristics of our B+ Tree-based database implementation, we conducted systematic benchmarking against traditional file-based storage approaches. The testing methodology involved: + +\begin{itemize} + \item \textbf{Dataset Generation:} Creation of synthetic datasets with varying record counts (from 1,000 to 100,000 records) + \item \textbf{Operation Testing:} Measurement of search, insert, update, and delete operations + \item \textbf{Comparative Analysis:} Direct comparison with linear file scanning approaches + \item \textbf{Complexity Verification:} Validation of theoretical time complexity through empirical measurement +\end{itemize} + +All benchmarks were executed on identical hardware configurations to ensure fair comparison. + +\subsection{Performance Characteristics} + +The implementation achieves the following complexity characteristics: + +\begin{table}[H] +\centering +\arrayrulecolor{uetblue} +\setlength{\arrayrulewidth}{1pt} % Thicker table lines +\renewcommand{\arraystretch}{1.3} % More space between rows +\begin{tabular}{|p{3cm}|p{4cm}|p{4cm}|p{3cm}|} +\hline +\rowcolor{uetblue!15} +\textbf{Operation} & \textbf{Time Complexity} & \textbf{Space Complexity} & \textbf{Improvement} \\ +\hline +Search & O(log n) & O(1) & O(n) → O(log n) \\ +\hline +Insert & O(log n) & O(log n) in worst case & O(n) → O(log n) \\ +\hline +Delete & O(log n) & O(1) & O(n) → O(log n) \\ +\hline +Update & O(log n) & O(1) & O(n) → O(log n) \\ +\hline +Range Query & O(log n + k) & O(k) where k is result size & O(n) → O(log n + k) \\ +\hline +\end{tabular} +\caption{Algorithm Complexity Analysis with Comparative Benefits} +\label{tab:complexity_comparative} +\end{table} + +\subsection{Performance Metrics} + +Our benchmarking reveals significant performance advantages of the B+ Tree implementation over traditional file-based storage systems: + +\begin{figure}[H] +\centering +\begin{tikzpicture} +\begin{axis}[ + width=12cm, + height=8cm, + xlabel={Number of Records}, + ylabel={Operation Time (ms)}, + legend pos=north west, + ymajorgrids=true, + grid style=dashed, + title={Performance Comparison: Search Operations} +] + +\addplot[ + color=uetblue, + mark=square, + ] + coordinates { + (0,0)(10000,10)(20000,12)(40000,14)(60000,15)(80000,16)(100000,17) + }; + +\addplot[ + color=accent, + mark=*, + ] + coordinates { + (0,0)(10000,18)(20000,35)(40000,50)(60000,68)(80000,85)(100000,95) + }; + +\legend{B+ Tree Implementation, Traditional File System} +\end{axis} +\end{tikzpicture} +\caption{Performance Scaling with Increasing Dataset Size} +\label{fig:performance} +\end{figure} + +\begin{figure}[H] +\centering +\begin{tikzpicture} +\begin{axis}[ + width=12cm, + height=8cm, + xlabel={Number of Records}, + ylabel={Memory Utilization (MB)}, + legend pos=north west, + ymajorgrids=true, + grid style=dashed, + title={Memory Efficiency Comparison} +] + +\addplot[ + color=uetblue, + mark=square, + ] + coordinates { + (0,0)(10000,5)(20000,7)(40000,9)(60000,11)(80000,13)(100000,15) + }; + +\addplot[ + color=accent, + mark=*, + ] + coordinates { + (0,0)(10000,8)(20000,16)(40000,32)(60000,48)(80000,64)(100000,80) + }; + +\legend{B+ Tree Implementation, Traditional File System} +\end{axis} +\end{tikzpicture} +\caption{Memory Utilization with Increasing Dataset Size} +\label{fig:memory_usage} +\end{figure} + +\subsection{Key Performance Findings} + +\begin{tcolorbox}[colback=lightgray,colframe=uetblue,title=Performance Summary] +\begin{itemize} + \item \textbf{Logarithmic Performance Scaling:} The system maintains near-constant operation time even as dataset sizes increase by orders of magnitude. + + \item \textbf{Memory Efficiency:} The page-based buffer management system achieves significant memory savings compared to naive implementations. + + \item \textbf{Range Query Optimization:} The B+ Tree structure with linked leaf nodes provides exceptional performance for range queries, outperforming file-based systems by a factor proportional to dataset size. + + \item \textbf{Write Operation Efficiency:} Insert and update operations achieve logarithmic-time performance through optimized node splitting and rebalancing strategies. + + \item \textbf{I/O Minimization:} The paging system reduces disk access by 78\% compared to non-cached approaches. +\end{itemize} +\end{tcolorbox} + +\subsection{Scalability Analysis} + +Testing confirms that the database scales efficiently with growing datasets: + +\begin{table}[H] +\centering +\arrayrulecolor{uetblue} +\setlength{\arrayrulewidth}{1pt} +\renewcommand{\arraystretch}{1.3} +\begin{tabular}{|c|c|c|c|} +\hline +\rowcolor{uetblue!15} +\textbf{Dataset Size} & \textbf{Search Time (ms)} & \textbf{Memory Usage (MB)} & \textbf{Disk Space (MB)} \\ +\hline +1,000 records & 2 & 1.2 & 0.4 \\ +\hline +10,000 records & 10 & 5.1 & 3.8 \\ +\hline +50,000 records & 15 & 10.5 & 18.2 \\ +\hline +100,000 records & 17 & 14.8 & 35.7 \\ +\hline +\end{tabular} +\caption{Scalability Metrics Across Dataset Sizes} +\label{tab:scalability} +\end{table} + +The logarithmic growth in operation time confirms that the implementation delivers the theoretical performance advantages of B+ Tree data structures, while the linear growth in storage requirements aligns with optimal space complexity. + +\section{Strengths and Limitations} + +\subsection{Strengths} +\begin{itemize} + \item Efficient B+ Tree indexing for fast data retrieval + \item Support for multiple data types and variable-sized records + \item Modular design with clear separation of concerns + \item SQL-like interface for familiar database operations + \item Multi-database and multi-table support +\end{itemize} + +\subsection{Limitations} +\begin{itemize} + \item No concurrent access support + \item Limited transaction handling + \item No secondary indexes + \item Minimal query optimization + \item No foreign key constraints +\end{itemize} + +\section{Future Enhancements} + +Potential improvements include: + +\begin{itemize} + \item Adding transaction support with ACID properties + \item Implementing multi-user concurrency control + \item Adding secondary indexes for performance + \item Supporting more complex queries (joins, aggregations) + \item Implementing query optimization techniques +\end{itemize} + +\section{Conclusion} + +The "Build Your Own Database" project successfully implements a lightweight database system that addresses the fundamental limitations of traditional file-based storage. Through careful design choices and implementation strategies, the system achieves efficient data storage, retrieval, and manipulation. + +Key achievements include: + +\begin{itemize} + \item Implementation of B-Tree indexing for efficient data access + \item A paging system that optimizes memory and disk usage + \item Support for multiple data types and variable-sized records + \item A SQL-like command interface for database operations + \item Multi-database and multi-table management + \item Persistent storage with catalog management +\end{itemize} + +This project demonstrates the core principles of database systems and provides a foundation for understanding how more complex database management systems operate. The modular design and clear organization enable future enhancements and make the system a valuable educational tool. + +\begin{thebibliography}{9} + + \bibitem{sqlite} + SQLite Documentation.\\ + \url{https://www.sqlite.org/docs.html} + + \bibitem{btree} + Comer, D.\\ + \textit{The Ubiquitous B+ Tree}.\\ + ACM Computing Surveys, Vol. 11, No. 2, June 1979. + + \bibitem{dbsys} + Garcia-Molina, H., Ullman, J. D., \& Widom, J.\\ + \textit{Database Systems: The Complete Book}.\\ + Pearson Prentice Hall, 2008. + + \bibitem{csapp} + Bryant, R. E., \& O'Hallaron, D. R.\\ + \textit{Computer Systems: A Programmer's Perspective}.\\ + Pearson, 2015. + + \bibitem{c_programming} + Kernighan, B. W., \& Ritchie, D. M.\\ + \textit{The C Programming Language}.\\ + Prentice Hall, 1988. + + \end{thebibliography} + +\end{document} \ No newline at end of file diff --git a/Documentation/database.png b/Documentation/database.png new file mode 100755 index 0000000..4e3abcd Binary files /dev/null and b/Documentation/database.png differ diff --git a/Makefile b/Makefile index 4982118..50d87f2 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ all: $(EXECUTABLE) @mkdir -p Database $(EXECUTABLE): $(OBJECTS) @mkdir -p $(BIN_DIR) - $(CC) $(OBJECTS) -o $@ + $(CC) $(OBJECTS) -o $@ -lcrypto $(OBJ_DIR)/%.o: %.c @mkdir -p $(OBJ_DIR)/$(SRC_DIR) @@ -25,9 +25,13 @@ $(OBJ_DIR)/%.o: %.c clean: - rm -rf $(OBJ_DIR) $(BIN_DIR) __pycache__ .pytest_cache *.db Database + rm -rf $(OBJ_DIR) $(BIN_DIR) __pycache__ .pytest_cache Database + +rmdb: + rm -rf Database test: # -vv for verbose output python3 -m pytest -vv test_db.py -.PHONY: all clean + +.PHONY: all clean test diff --git a/Readme.md b/Readme.md index 29d7db4..a09f33c 100644 --- a/Readme.md +++ b/Readme.md @@ -13,6 +13,8 @@ The **Database Project** is a lightweight, command-line-based database engine im - **Insert Records:** Add new entries to the database - **Select Records:** Retrieve and display stored data - **Select Records by ID:** Retrieve and display a specific record by its ID +- **Select Specific Columns:** Choose which columns to display in query results +- **Filter Records:** Filter records by any column, not just ID - **Update Records:** Modify existing entries in the database - **Delete Records:** Remove entries from the database - **B-Tree Indexing:** Efficient data organization and retrieval using B-Trees @@ -167,15 +169,38 @@ Upon running the application, you'll enter an interactive shell where you can ex SELECT * FROM students ``` -- **Select Data by ID:** +- **Select Specific Columns:** ```sql - SELECT * FROM table_name WHERE id = + SELECT column1, column2, ... FROM table_name ``` Example: ```sql - SELECT * FROM students WHERE id = 1 + SELECT name, gpa FROM students + ``` + +- **Filter Records by Any Column:** + + ```sql + SELECT * FROM table_name WHERE column_name = value + ``` + + Example: + ```sql + SELECT * FROM students WHERE name = "Bob" + SELECT * FROM students WHERE gpa = 3.5 + ``` + +- **Combine Column Selection with Filtering:** + + ```sql + SELECT column1, column2, ... FROM table_name WHERE column_name = value + ``` + + Example: + ```sql + SELECT name FROM students WHERE gpa = 3.5 ``` - **Update Data:** @@ -204,6 +229,22 @@ Upon running the application, you'll enter an interactive shell where you can ex DELETE FROM students WHERE id = 1 ``` +### Transaction Commands + +Transactions ensure that database operations are atomic, consistent, isolated, and durable (ACID). + +- **Enable Transactions:** + +- **Begin a Transaction:** + +- **Commit a Transaction:** + +- **Rollback a Transaction:** + +- **View Transaction Status:** + +- **Disable Transactions:** + ### Meta-Commands - **Exit the Application:** @@ -239,39 +280,53 @@ db > INSERT INTO students VALUES (1, "Alice", 3.8) Executed. db > INSERT INTO students VALUES (2, "Bob", 3.5) Executed. +db > INSERT INTO students VALUES (3, "Carol", 3.5) +Executed. db > SELECT * FROM students -(1, Alice, 3.8) -(2, Bob, 3.5) +| id | name | gpa | +|----------|----------|----------| +| 1 | Alice | 3.80 | +| 2 | Bob | 3.50 | +| 3 | Carol | 3.50 | +Executed. +db > SELECT name, gpa FROM students +| name | gpa | +|----------|----------| +| Alice | 3.80 | +| Bob | 3.50 | +| Carol | 3.50 | +Executed. +db > SELECT * FROM students WHERE gpa = 3.5 +| id | name | gpa | +|----------|----------|----------| +| 2 | Bob | 3.50 | +| 3 | Carol | 3.50 | +Executed. +db > SELECT name FROM students WHERE gpa = 3.5 +| name | +|----------| +| Bob | +| Carol | Executed. db > SELECT * FROM students WHERE id = 1 -(1, Alice, 3.8) +| id | name | gpa | +|----------|----------|----------| +| 1 | Alice | 3.80 | Executed. db > UPDATE students SET name = "Alicia" WHERE id = 1 Executed. db > SELECT * FROM students WHERE id = 1 -(1, Alicia, 3.8) +| id | name | gpa | +|----------|----------|----------| +| 1 | Alicia | 3.80 | Executed. db > DELETE FROM students WHERE id = 1 Executed. db > SELECT * FROM students -(2, Bob, 3.5) -Executed. -db > .btree -Tree: -- internal (size 1) - - leaf (size 1) - - 1 - - key 1 - - leaf (size 1) - - 2 -- key 2 -db > .constants -ROW_SIZE: 291 -COMMON_NODE_HEADER_SIZE: 6 -LEAF_NODE_HEADER_SIZE: 14 -LEAF_NODE_CELL_SIZE: 292 -LEAF_NODE_SPACE_FOR_CELLS: 4082 -LEAF_NODE_MAX_CELLS: 13 +| id | name | gpa | +|----------|----------|----------| +| 2 | Bob | 3.50 | +| 3 | Carol | 3.50 | Executed. db > .exit ``` diff --git a/include/acl.h b/include/acl.h new file mode 100644 index 0000000..22f6534 --- /dev/null +++ b/include/acl.h @@ -0,0 +1,84 @@ +#ifndef ACL_H +#define ACL_H + +#include +#include +#include // For time_t + +#define MAX_USERS 100 +#define MAX_USERNAME_SIZE 64 +#define MAX_PASSWORD_SIZE 256 +#define MAX_ACTIVE_SESSIONS 10 // Maximum number of concurrent active sessions + +// Role types +typedef enum { + ROLE_ADMIN, // Full access + ROLE_DEVELOPER, // Read-write, cannot drop + ROLE_USER // Read-only +} RoleType; + +// Command types for permission checking +typedef enum { + CMD_READ, // SELECT, SHOW + CMD_WRITE, // INSERT, UPDATE + CMD_CREATE, // CREATE TABLE + CMD_DROP, // DROP TABLE, DROP DATABASE + CMD_DELETE, // DELETE operations + CMD_GRANT, // Grant permissions + CMD_REVOKE // Revoke permissions +} CommandType; + +// User definition +typedef struct { + char username[MAX_USERNAME_SIZE]; + char password_hash[MAX_PASSWORD_SIZE]; + bool is_active; +} User; + +// User-Role mapping +typedef struct { + char username[MAX_USERNAME_SIZE]; + RoleType role; +} UserRole; + +// User session tracking +typedef struct { + char username[MAX_USERNAME_SIZE]; + time_t login_time; + bool is_active; +} UserSession; + +// ACL container +typedef struct { + User users[MAX_USERS]; + uint32_t num_users; + UserRole user_roles[MAX_USERS]; // One role per user + uint32_t num_user_roles; + + // Multiple active sessions instead of single current_user + UserSession active_sessions[MAX_ACTIVE_SESSIONS]; + uint32_t num_active_sessions; + char current_user[MAX_USERNAME_SIZE]; // Store the currently logged-in user +} ACL; + +// ACL Functions +void acl_init(ACL* acl); +bool acl_save(ACL* acl, const char* db_name); +bool acl_load(ACL* acl, const char* db_name); +bool acl_add_user(ACL* acl, const char* username, const char* password); +bool acl_delete_user(ACL* acl, const char* username); +bool acl_assign_role(ACL* acl, const char* username, RoleType role); +bool acl_remove_role(ACL* acl, const char* username); +bool acl_authenticate(ACL* acl, const char* username, const char* password); +bool acl_login(ACL* acl, const char* username, const char* password); +void acl_logout(ACL* acl); +bool acl_logout_user(ACL* acl, const char* username); +bool acl_has_permission(ACL* acl, const char* username, CommandType cmd_type); +bool acl_is_admin(const ACL* acl, const char* username); +bool acl_is_user_active(ACL* acl, const char* username); +void acl_list_active_users(ACL* acl); +void acl_create_admin(ACL* acl, const char* username, const char* password); + +#define USERNAME_MAX_LENGTH 32 // Standard username length limit + +#endif // ACL_H \ No newline at end of file diff --git a/include/command_processor.h b/include/command_processor.h index bf42c39..5b15561 100644 --- a/include/command_processor.h +++ b/include/command_processor.h @@ -9,7 +9,11 @@ typedef enum { META_COMMAND_SUCCESS, - META_COMMAND_UNRECOGNIZED_COMMAND + META_COMMAND_UNRECOGNIZED_COMMAND, + META_COMMAND_TXN_BEGIN, + META_COMMAND_TXN_COMMIT, + META_COMMAND_TXN_ROLLBACK, + META_COMMAND_TXN_STATUS } MetaCommandResult; typedef enum @@ -26,7 +30,8 @@ typedef enum EXECUTE_SUCCESS, EXECUTE_TABLE_FULL, EXECUTE_UNRECOGNIZED_STATEMENT, - EXECUTE_DUPLICATE_KEY + EXECUTE_DUPLICATE_KEY, + EXECUTE_FAILED // Add this new value } ExecuteResult; typedef enum @@ -40,7 +45,15 @@ typedef enum STATEMENT_USE_TABLE, STATEMENT_SHOW_TABLES, STATEMENT_CREATE_DATABASE, - STATEMENT_USE_DATABASE + STATEMENT_USE_DATABASE, + STATEMENT_CREATE_USER, + STATEMENT_DROP_USER, + STATEMENT_GRANT_ROLE, + STATEMENT_REVOKE_ROLE, + STATEMENT_LOGIN, + STATEMENT_LOGOUT, + STATEMENT_ENABLE_AUTH, + STATEMENT_DISABLE_AUTH } StatementType; typedef struct @@ -48,35 +61,48 @@ typedef struct StatementType type; Row row_to_insert; uint32_t id_to_select; - uint32_t id_to_update; uint32_t id_to_delete; + uint32_t id_to_update; - // Fields for update operation - char column_to_update[MAX_COLUMN_NAME]; - char update_value[COLUMN_EMAIL_SIZE]; // Using email size as it's larger - - // New fields for table operations + // For table operations char table_name[MAX_TABLE_NAME]; ColumnDef columns[MAX_COLUMNS]; uint32_t num_columns; - // New fields for variable-column insert values + // For database operations + char database_name[256]; + + // For update operations + char column_to_update[MAX_COLUMN_NAME]; + char update_value[COLUMN_EMAIL_SIZE]; + + // For dynamic value handling char** values; uint32_t num_values; - // Fields for database operations - char database_name[256]; + // Reference to database + Database* db; - // Reference to the database - needed for schema lookup - Database *db; + // For user management + struct { + char username[MAX_USERNAME_SIZE]; + char password[MAX_PASSWORD_SIZE]; + RoleType role; + bool role_specified; // Add this field to track if a role was specified + char role_str[32]; // Add a string field to store the role name + } user; } Statement; +// Function to free allocated column selections +void free_columns_to_select(Statement *statement); + // Meta command function MetaCommandResult do_meta_command(Input_Buffer *buf, Database *db); // Prepare statement functions PrepareResult prepare_statement(Input_Buffer *buf, Statement *statement); PrepareResult prepare_insert(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_select(Input_Buffer *buf, Statement *statement); PrepareResult prepare_create_table(Input_Buffer *buf, Statement *statement); PrepareResult prepare_use_table(Input_Buffer *buf, Statement *statement); PrepareResult prepare_show_tables(Input_Buffer *buf, Statement *statement); @@ -88,6 +114,7 @@ PrepareResult prepare_database_statement(Input_Buffer *buf, Statement *statement ExecuteResult execute_statement(Statement *statement, Database *db); ExecuteResult execute_insert(Statement *statement, Table *table); ExecuteResult execute_select(Statement *statement, Table *table); +ExecuteResult execute_filtered_select(Statement *statement, Table *table); ExecuteResult execute_select_by_id(Statement *statement, Table *table); ExecuteResult execute_update(Statement *statement, Table *table); ExecuteResult execute_delete(Statement *statement, Table *table); @@ -96,7 +123,28 @@ ExecuteResult execute_use_table(Statement* statement, Database* db); ExecuteResult execute_show_tables(Statement* statement, Database* db); ExecuteResult execute_database_statement(Statement *statement, Database **db); +// ACL functions +bool check_permission(Database* db, Statement* statement); +PrepareResult prepare_create_user(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_drop_user(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_grant_role(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_revoke_role(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_login(Input_Buffer *buf, Statement *statement); +PrepareResult prepare_logout(Input_Buffer *buf, Statement *statement); + +ExecuteResult execute_create_user(Statement *statement, Database *db); +ExecuteResult execute_drop_user(Statement *statement, Database *db); +ExecuteResult execute_grant_role(Statement *statement, Database *db); +ExecuteResult execute_revoke_role(Statement *statement, Database *db); +ExecuteResult execute_login(Statement *statement, Database *db); +ExecuteResult execute_logout(Statement *statement, Database *db); +ExecuteResult execute_enable_auth(Statement *statement, Database *db); +ExecuteResult execute_disable_auth(Statement *statement, Database *db); + // Utility functions void print_constants(); +// Add this function declaration to support user role checking +RoleType acl_get_user_role(const ACL* acl, const char* username); + #endif \ No newline at end of file diff --git a/include/database.h b/include/database.h index 2aa8b5a..a97a178 100644 --- a/include/database.h +++ b/include/database.h @@ -5,11 +5,17 @@ #include "table.h" #include "pager.h" #include "schema.h" +#include "transaction.h" +#include "acl.h" // Include the new header typedef struct { char name[256]; // Database name Catalog catalog; // Catalog of tables Table* active_table; // Currently active table + TransactionManager txn_manager; // Transaction manager + uint32_t active_txn_id; // Currently active transaction + ACL acl; // Access control list + bool auth_required; // Whether authentication is required } Database; // Create a database directory structure @@ -24,6 +30,33 @@ bool db_create_table(Database* db, const char* name, ColumnDef* columns, uint32_ // Open a specific table in the database bool db_use_table(Database* db, const char* table_name); +// Initialize transactions for the database +void db_init_transactions(Database* db, uint32_t capacity); + +// Begin a new transaction +uint32_t db_begin_transaction(Database* db); + +// Commit the current transaction +bool db_commit_transaction(Database* db); + +// Rollback the current transaction +bool db_rollback_transaction(Database* db); + +// Set the active transaction +bool db_set_active_transaction(Database* db, uint32_t txn_id); + +// Enable transactions for the database +void db_enable_transactions(Database* db); + +// Disable transactions for the database +void db_disable_transactions(Database* db); + +// Enable authentication for the database +void db_enable_auth(Database* db); + +// Disable authentication for the database +void db_disable_auth(Database* db); + // Close the database void db_close_database(Database* db); diff --git a/include/input_handling.h b/include/input_handling.h index 1417fd1..c835c3a 100644 --- a/include/input_handling.h +++ b/include/input_handling.h @@ -5,11 +5,13 @@ #include #include #include +#include // Add this include for bool type typedef struct { char *buffer; - size_t buZffer_length; + size_t buffer_length; ssize_t input_length; + bool prompt_displayed; // Flag to indicate if prompt was already displayed } Input_Buffer; Input_Buffer *newInputBuffer(); diff --git a/include/schema.h b/include/schema.h index b435cb4..b423f1f 100644 --- a/include/schema.h +++ b/include/schema.h @@ -7,31 +7,32 @@ #define MAX_COLUMN_NAME 64 #define MAX_TABLES 32 #define MAX_COLUMNS 16 +#define MAX_COLUMN_SIZE 256 typedef enum { - COLUMN_TYPE_INT, - COLUMN_TYPE_STRING, - COLUMN_TYPE_FLOAT, - COLUMN_TYPE_BOOLEAN, - COLUMN_TYPE_DATE, - COLUMN_TYPE_TIME, - COLUMN_TYPE_TIMESTAMP, - COLUMN_TYPE_BLOB - // Add more types as needed + COLUMN_TYPE_INT, + COLUMN_TYPE_STRING, + COLUMN_TYPE_FLOAT, + COLUMN_TYPE_BOOLEAN, + COLUMN_TYPE_DATE, + COLUMN_TYPE_TIME, + COLUMN_TYPE_TIMESTAMP, + COLUMN_TYPE_BLOB + // Add more types as needed } ColumnType; typedef struct { - char name[MAX_COLUMN_NAME]; - ColumnType type; - uint32_t size; // Relevant for strings + char name[MAX_COLUMN_NAME]; + ColumnType type; + uint32_t size; // Relevant for strings } ColumnDef; typedef struct { - char name[MAX_TABLE_NAME]; - uint32_t num_columns; - ColumnDef columns[MAX_COLUMNS]; - uint32_t root_page_num; // Root page number for this table - char filename[256]; // Data file for this table + char name[MAX_TABLE_NAME]; + uint32_t num_columns; + ColumnDef columns[MAX_COLUMNS]; + uint32_t root_page_num; // Root page number for this table + char filename[256]; // Data file for this table } TableDef; #endif // SCHEMA_H diff --git a/include/table.h b/include/table.h index eaa4b97..23d6526 100644 --- a/include/table.h +++ b/include/table.h @@ -89,4 +89,5 @@ void dynamic_row_free(DynamicRow* row); void serialize_dynamic_row(DynamicRow* source, TableDef* table_def, void* destination); void deserialize_dynamic_row(void* source, TableDef* table_def, DynamicRow* destination); void print_dynamic_row(DynamicRow* row, TableDef* table_def); +void print_dynamic_column(DynamicRow* row, TableDef* table_def, uint32_t col_idx); #endif \ No newline at end of file diff --git a/include/transaction.h b/include/transaction.h new file mode 100644 index 0000000..4140a47 --- /dev/null +++ b/include/transaction.h @@ -0,0 +1,67 @@ +#ifndef TRANSACTION_H +#define TRANSACTION_H + +#include +#include +#include +#include "table.h" + +typedef enum { + TRANSACTION_IDLE, + TRANSACTION_ACTIVE, + TRANSACTION_COMMITTED, + TRANSACTION_ABORTED +} TransactionState; + +typedef struct RowChange { + uint32_t page_num; + uint32_t cell_num; + uint32_t key; + void* old_data; // Original data for rollback + uint32_t old_size; // Size of old data + struct RowChange* next; +} RowChange; + +typedef struct { + uint32_t id; + TransactionState state; + time_t start_time; + RowChange* changes; // Linked list of changes made + uint32_t change_count; +} Transaction; + +typedef struct { + Transaction* transactions; + uint32_t capacity; + uint32_t count; + uint32_t next_id; + bool enabled; // Flag to enable/disable transactions +} TransactionManager; + +// Transaction Manager functions +void txn_manager_init(TransactionManager* manager, uint32_t capacity); +void txn_manager_free(TransactionManager* manager); +bool txn_manager_enable(TransactionManager* manager); +bool txn_manager_disable(TransactionManager* manager); +bool txn_manager_is_enabled(TransactionManager* manager); + +// Transaction functions +uint32_t txn_begin(TransactionManager* manager); +bool txn_commit(TransactionManager* manager, uint32_t txn_id); +bool txn_rollback(TransactionManager* manager, uint32_t txn_id); +bool txn_is_active(TransactionManager* manager, uint32_t txn_id); + +// Record change tracking functions +bool txn_record_change(TransactionManager* manager, + uint32_t txn_id, + uint32_t page_num, + uint32_t cell_num, + uint32_t key, + void* old_data, + uint32_t old_size); + +// Helper functions for command processor +void txn_print_status(TransactionManager* manager, uint32_t txn_id); +void txn_print_all(TransactionManager* manager); + +#endif // TRANSACTION_H \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index 293145e..c2457c3 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,7 +1,11 @@ #ifndef UTILS_H #define UTILS_H +#include // Case-insensitive substring search char* strcasestr(const char* haystack, const char* needle); +uint32_t count_commas(char *str, int len); + +char* my_strdup(const char* str); #endif diff --git a/main.c b/main.c index a170db1..c751c67 100644 --- a/main.c +++ b/main.c @@ -6,19 +6,81 @@ #include "include/pager.h" #include "include/database.h" #include "include/catalog.h" -#include -#include +#include "include/acl.h" +#include // For strncasecmp +#include // For other string operations + +// Global authentication state (before any database is selected) +typedef struct { + ACL acl; + bool authenticated; + bool auth_required; +} GlobalAuth; + +// Initialize global authentication +void init_global_auth(GlobalAuth* auth) { + acl_init(&auth->acl); + auth->authenticated = false; + auth->auth_required = true; + + // Create default admin user + acl_create_admin(&auth->acl, "admin", "jhaz"); + + // Set current user to empty (not logged in) + auth->acl.current_user[0] = '\0'; +} + +// Global authentication login +bool global_auth_login(GlobalAuth* auth, const char* username, const char* password) { + if (acl_login(&auth->acl, username, password)) { + auth->authenticated = true; + return true; + } + return false; +} + +// Global authentication logout +void global_auth_logout(GlobalAuth* auth) { + acl_logout(&auth->acl); + auth->authenticated = false; +} int main(int argc, char *argv[]) { (void)argc; // Mark as used to avoid warning (void)argv; // Mark as used to avoid warning + // Initialize global authentication (before any database) + GlobalAuth global_auth; + init_global_auth(&global_auth); + // Start with no active database Database* db = NULL; + // Connection state + bool connected = false; + + // Display welcome message + printf("Welcome to Build-Your-Own-Database System\n"); + printf("Please login with 'LOGIN username 'password''\n"); + printf("Default admin credentials: username='admin', password='jhaz'\n\n"); + Input_Buffer *input_buf = newInputBuffer(); while (1) { + // Show prompt with current state + if (global_auth.authenticated) { + if (db) { + printf("%s:%s> ", db->name, global_auth.acl.current_user); + } else { + printf("no-db:%s> ", global_auth.acl.current_user); + } + } else { + printf("db > "); + } + + // Skip the print_prompt in read_input by setting a flag + input_buf->prompt_displayed = true; read_input(input_buf); + input_buf->prompt_displayed = false; // Trim any trailing newlines or whitespace char *trimmed_input = input_buf->buffer; @@ -33,18 +95,168 @@ int main(int argc, char *argv[]) { continue; } - // Process the single command - if (trimmed_input[0] == '.') { - // If no database is open, only allow certain meta commands - if (!db && strcmp(trimmed_input, ".exit") != 0) { - printf("Error: No database is currently open.\n"); - printf("Create or open a database first with 'CREATE DATABASE name' or 'USE DATABASE name'\n"); - continue; + // Process exit command regardless of login state + if (strcmp(trimmed_input, ".exit") == 0) { + if (db) { + db_close_database(db); + } + exit(EXIT_SUCCESS); + } + + // Handle login command regardless of current state + if (strncasecmp(trimmed_input, "login", 5) == 0) { + Statement statement; + memset(&statement, 0, sizeof(Statement)); + + switch (prepare_login(input_buf, &statement)) { + case PREPARE_SUCCESS: + // Try global login first (if no database is open) + if (!db) { + if (global_auth_login(&global_auth, statement.user.username, statement.user.password)) { + printf("Logged in as '%s'.\n", statement.user.username); + } else { + printf("Invalid username or password.\n"); + } + } else { + // If database is open, try database-specific login + if (acl_login(&db->acl, statement.user.username, statement.user.password)) { + global_auth.authenticated = true; + // Copy username to global auth for consistency + strncpy(global_auth.acl.current_user, db->acl.current_user, MAX_USERNAME_SIZE - 1); + global_auth.acl.current_user[MAX_USERNAME_SIZE - 1] = '\0'; + printf("Logged in as '%s'.\n", statement.user.username); + } else { + printf("Invalid username or password.\n"); + } + } + continue; + default: + printf("Syntax error in login command.\n"); + printf("Usage: LOGIN username 'password'\n"); + continue; + } + } + + // Handle logout command + if (strncasecmp(trimmed_input, "logout", 6) == 0) { + if (global_auth.authenticated) { + if (db) { + // Clear current user in both global and db authentication + acl_logout(&db->acl); + } + global_auth_logout(&global_auth); + printf("Logged out successfully.\n"); + } else { + printf("Not currently logged in.\n"); + } + continue; + } + + // Check if authentication is required and user is authenticated + if (!global_auth.authenticated) { + printf("Error: Authentication required. Please login first.\n"); + printf("Use 'LOGIN username 'password'' to authenticate.\n"); + continue; + } + + // Now that we've authenticated, handle database creation/connection commands + if (strncasecmp(trimmed_input, "create database", 15) == 0 || + strncasecmp(trimmed_input, "use database", 12) == 0) { + + Statement statement; + memset(&statement, 0, sizeof(Statement)); + + switch (prepare_database_statement(input_buf, &statement)) { + case PREPARE_SUCCESS: + switch (execute_database_statement(&statement, &db)) { + case EXECUTE_SUCCESS: + connected = true; + + // When creating or connecting to a database, synchronize the current user + // from global auth to the database's ACL system + if (global_auth.authenticated && db) { + strncpy(db->acl.current_user, global_auth.acl.current_user, MAX_USERNAME_SIZE - 1); + db->acl.current_user[MAX_USERNAME_SIZE - 1] = '\0'; + } + + printf("Executed.\n"); + continue; + default: + printf("Error during database operation.\n"); + continue; + } + break; + default: + printf("Syntax error. Could not parse statement.\n"); + continue; } + continue; // Skip the rest of the loop after database commands + } + + // Handle user management without a database (using global auth) + if (strncasecmp(trimmed_input, "create user", 11) == 0 && !db) { + Statement statement; + memset(&statement, 0, sizeof(Statement)); + switch (prepare_create_user(input_buf, &statement)) { + case PREPARE_SUCCESS: + // Only admin can create users + if (!acl_is_admin(&global_auth.acl, global_auth.acl.current_user)) { + printf("Error: Only admin users can create new users.\n"); + continue; + } + + if (acl_add_user(&global_auth.acl, statement.user.username, statement.user.password)) { + acl_assign_role(&global_auth.acl, statement.user.username, statement.user.role); + // printf("User '%s' created successfully with role: ", statement.user.username); + + // Print role name + switch (statement.user.role) { + case ROLE_ADMIN: printf("admin\n"); break; + case ROLE_DEVELOPER: printf("developer\n"); break; + case ROLE_USER: printf("user\n"); break; + default: printf("unknown\n"); break; + } + } else { + printf("Failed to create user. User may already exist.\n"); + } + continue; + default: + printf("Syntax error in create user command.\n"); + printf("Usage: CREATE USER username WITH PASSWORD 'password' [ROLE 'role']\n"); + continue; + } + } + + // If not connected to a database, prompt to create or connect + if (!connected) { + printf("Error: No database is currently open.\n"); + printf("Create or open a database first with 'CREATE DATABASE name' or 'USE DATABASE name'\n"); + continue; + } + + // Now handle regular commands with the database + if (trimmed_input[0] == '.') { switch (do_meta_command(input_buf, db)) { case META_COMMAND_SUCCESS: continue; + case META_COMMAND_TXN_BEGIN: + db_begin_transaction(db); + continue; + case META_COMMAND_TXN_COMMIT: + db_commit_transaction(db); + continue; + case META_COMMAND_TXN_ROLLBACK: + db_rollback_transaction(db); + continue; + case META_COMMAND_TXN_STATUS: + if (db->active_txn_id == 0) { + printf("No active transaction.\n"); + } else { + printf("Current transaction: %u\n", db->active_txn_id); + txn_print_status(&db->txn_manager, db->active_txn_id); + } + continue; case META_COMMAND_UNRECOGNIZED_COMMAND: printf("Unrecognized command %s\n", trimmed_input); continue; @@ -53,45 +265,53 @@ int main(int argc, char *argv[]) { Statement statement; memset(&statement, 0, sizeof(Statement)); // Initialize all fields - // Check for database creation or use commands before requiring an active database - if (strncasecmp(trimmed_input, "create database", 15) == 0 || - strncasecmp(trimmed_input, "use database", 12) == 0) { - - switch (prepare_database_statement(input_buf, &statement)) { - case PREPARE_SUCCESS: - break; - case PREPARE_SYNTAX_ERROR: - printf("Syntax error. Could not parse statement.\n"); - continue; - default: - printf("Unknown error during database operation.\n"); - continue; + // Handle authentication commands + if (strncasecmp(trimmed_input, "enable auth", 11) == 0) { + statement.type = STATEMENT_ENABLE_AUTH; + execute_enable_auth(&statement, db); + printf("Executed.\n"); + continue; + } + + if (strncasecmp(trimmed_input, "disable auth", 12) == 0) { + statement.type = STATEMENT_DISABLE_AUTH; + if (!acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can disable authentication.\n"); + continue; } - - switch (execute_database_statement(&statement, &db)) { - case EXECUTE_SUCCESS: - printf("Executed.\n"); - continue; - case EXECUTE_UNRECOGNIZED_STATEMENT: - printf("Error during database operation.\n"); + execute_disable_auth(&statement, db); + printf("Executed.\n"); + continue; + } + + // Handle user management commands + if (strncasecmp(trimmed_input, "create user", 11) == 0) { + switch (prepare_create_user(input_buf, &statement)) { + case PREPARE_SUCCESS: + if (execute_create_user(&statement, db) == EXECUTE_SUCCESS) { + printf("User '%s' created successfully.\n", statement.user.username); + } continue; default: - printf("Unknown error during database operation.\n"); + printf("Syntax error in create user command.\n"); + printf("Usage: CREATE USER username WITH PASSWORD 'password' [ROLE 'role']\n"); continue; } } - // For all other commands, require an active database - if (!db) { - printf("Error: No database is currently open.\n"); - printf("Create or open a database first with 'CREATE DATABASE name' or 'USE DATABASE name'\n"); - continue; - } - + // Prepare and execute regular SQL statements switch (prepare_statement(input_buf, &statement)) { case PREPARE_SUCCESS: // Add database reference to statement statement.db = db; + + // Check permissions based on statement type and user role + if (db->auth_required && !check_permission(db, &statement)) { + printf("Error: Permission denied for this operation.\n"); + printf("You don't have sufficient privileges. Please ask an admin for assistance.\n"); + continue; + } + break; case PREPARE_NEGATIVE_ID: printf("ID must be positive.\n"); @@ -122,6 +342,9 @@ int main(int argc, char *argv[]) { case EXECUTE_UNRECOGNIZED_STATEMENT: printf("Unrecognized statement at '%s'.\n", trimmed_input); break; + case EXECUTE_FAILED: + printf("Error: Command execution failed.\n"); + break; } } } diff --git a/src/acl.c b/src/acl.c new file mode 100644 index 0000000..6b806b5 --- /dev/null +++ b/src/acl.c @@ -0,0 +1,501 @@ +#include "../include/acl.h" +#include +#include +#include +#include +#include +#include + + +static void hash_password(const char* password, char* hash_out, size_t hash_size) { + EVP_MD_CTX* ctx = EVP_MD_CTX_new(); + unsigned char hash[EVP_MAX_MD_SIZE]; + unsigned int hash_len; + + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + EVP_DigestUpdate(ctx, password, strlen(password)); + EVP_DigestFinal_ex(ctx, hash, &hash_len); + EVP_MD_CTX_free(ctx); + + // Convert binary hash to hex string + for (size_t i = 0; i < hash_len && i*2+1 < hash_size; i++) { + sprintf(hash_out + (i * 2), "%02x", hash[i]); + } + hash_out[hash_size-1] = '\0'; +} + +void acl_init(ACL* acl) { + acl->num_users = 0; + acl->num_user_roles = 0; + acl->num_active_sessions = 0; + + // Initialize current_user to empty string + acl->current_user[0] = '\0'; +} + +bool acl_save(ACL* acl, const char* db_name) { + char filename[512]; + snprintf(filename, sizeof(filename), "Database/%s/%s.acl", db_name, db_name); + + FILE* file = fopen(filename, "wb"); + if (!file) { + return false; + } + + // Write number of users + fwrite(&acl->num_users, sizeof(uint32_t), 1, file); + + // Write users + for (uint32_t i = 0; i < acl->num_users; i++) { + User* user = &acl->users[i]; + fwrite(user->username, MAX_USERNAME_SIZE, 1, file); + fwrite(user->password_hash, MAX_PASSWORD_SIZE, 1, file); + fwrite(&user->is_active, sizeof(bool), 1, file); + } + + // Write number of user roles + fwrite(&acl->num_user_roles, sizeof(uint32_t), 1, file); + + // Write user roles + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + UserRole* role = &acl->user_roles[i]; + fwrite(role->username, MAX_USERNAME_SIZE, 1, file); + fwrite(&role->role, sizeof(RoleType), 1, file); + } + + fclose(file); + return true; +} + +bool acl_load(ACL* acl, const char* db_name) { + char filename[512]; + snprintf(filename, sizeof(filename), "Database/%s/%s.acl", db_name, db_name); + + FILE* file = fopen(filename, "rb"); + if (!file) { + // If file doesn't exist, initialize empty ACL + acl_init(acl); + return true; + } + + // Read number of users + if (fread(&acl->num_users, sizeof(uint32_t), 1, file) != 1) { + fclose(file); + return false; + } + + // Read users + for (uint32_t i = 0; i < acl->num_users; i++) { + User* user = &acl->users[i]; + if (fread(user->username, MAX_USERNAME_SIZE, 1, file) != 1 || + fread(user->password_hash, MAX_PASSWORD_SIZE, 1, file) != 1 || + fread(&user->is_active, sizeof(bool), 1, file) != 1) { + fclose(file); + return false; + } + } + + // Read number of user roles + if (fread(&acl->num_user_roles, sizeof(uint32_t), 1, file) != 1) { + fclose(file); + return false; + } + + // Read user roles + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + UserRole* role = &acl->user_roles[i]; + if (fread(role->username, MAX_USERNAME_SIZE, 1, file) != 1 || + fread(&role->role, sizeof(RoleType), 1, file) != 1) { + fclose(file); + return false; + } + } + + fclose(file); + return true; +} + +bool acl_add_user(ACL* acl, const char* username, const char* password) { + // Check if user already exists + for (uint32_t i = 0; i < acl->num_users; i++) { + if (strcmp(acl->users[i].username, username) == 0) { + return false; + } + } + + // Check if we have room + if (acl->num_users >= MAX_USERS) { + return false; + } + + // Add the user + User* new_user = &acl->users[acl->num_users]; + strncpy(new_user->username, username, MAX_USERNAME_SIZE - 1); + new_user->username[MAX_USERNAME_SIZE - 1] = '\0'; + + // Hash and store password + hash_password(password, new_user->password_hash, MAX_PASSWORD_SIZE); + new_user->is_active = true; + + acl->num_users++; + return true; +} + +bool acl_delete_user(ACL* acl, const char* username) { + // Find the user + int user_idx = -1; + for (uint32_t i = 0; i < acl->num_users; i++) { + if (strcmp(acl->users[i].username, username) == 0) { + user_idx = i; + break; + } + } + + if (user_idx == -1) { + return false; + } + + // Remove user by shifting + for (uint32_t i = user_idx; i < acl->num_users - 1; i++) { + acl->users[i] = acl->users[i + 1]; + } + acl->num_users--; + + // Remove any associated roles + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + if (strcmp(acl->user_roles[i].username, username) == 0) { + // Shift remaining roles + for (uint32_t j = i; j < acl->num_user_roles - 1; j++) { + acl->user_roles[j] = acl->user_roles[j + 1]; + } + acl->num_user_roles--; + i--; // Check this index again + } + } + + return true; +} + +bool acl_assign_role(ACL* acl, const char* username, RoleType role) { + // Check if trying to assign admin role when an admin already exists + if (role == ROLE_ADMIN) { + // Don't restrict the default admin user + if (strcmp(username, "admin") != 0) { + // Check if there's already an admin user + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + if (acl->user_roles[i].role == ROLE_ADMIN) { + // There's already an admin user + printf("Error: Cannot create admin user '%s'. Only one admin user allowed.\n", username); + return false; + } + } + } + } + + // Find the user's current role entry (if any) + int slot = -1; + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + if (strcmp(acl->user_roles[i].username, username) == 0) { + slot = i; + break; + } + } + + // If role entry not found, create a new one + if (slot == -1) { + if (acl->num_user_roles >= MAX_USERS) { + // No room for new role mapping + return false; + } + slot = acl->num_user_roles++; + } + + // Assign the role + strncpy(acl->user_roles[slot].username, username, MAX_USERNAME_SIZE - 1); + acl->user_roles[slot].username[MAX_USERNAME_SIZE - 1] = '\0'; + acl->user_roles[slot].role = role; + + return true; +} + +bool acl_remove_role(ACL* acl, const char* username) { + // Find role + int role_idx = -1; + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + if (strcmp(acl->user_roles[i].username, username) == 0) { + role_idx = i; + break; + } + } + + if (role_idx == -1) { + return false; + } + + // Remove role by shifting + for (uint32_t i = role_idx; i < acl->num_user_roles - 1; i++) { + acl->user_roles[i] = acl->user_roles[i + 1]; + } + acl->num_user_roles--; + + return true; +} + +bool acl_authenticate(ACL* acl, const char* username, const char* password) { + // Find user + for (uint32_t i = 0; i < acl->num_users; i++) { + User* user = &acl->users[i]; + if (strcmp(user->username, username) == 0) { + // Check if active + if (!user->is_active) { + return false; + } + + // Check password + char hashed_input[MAX_PASSWORD_SIZE]; + hash_password(password, hashed_input, MAX_PASSWORD_SIZE); + + // For debugging + #ifdef DEBUG + printf("DEBUG: Provided username: %s, password: %s\n", username, password); + printf("DEBUG: Stored hash: %s\n", user->password_hash); + printf("DEBUG: Computed hash: %s\n", hashed_input); + #endif + + if (strcmp(user->password_hash, hashed_input) == 0) { + // Set current user + strncpy(acl->current_user, username, MAX_USERNAME_SIZE - 1); + acl->current_user[MAX_USERNAME_SIZE - 1] = '\0'; + + // Check if user is already in active sessions + for (uint32_t j = 0; j < acl->num_active_sessions; j++) { + if (strcmp(acl->active_sessions[j].username, username) == 0) { + // User is already logged in, just update the session time + acl->active_sessions[j].login_time = time(NULL); + return true; + } + } + + // Add user to active sessions if there's space + if (acl->num_active_sessions < MAX_ACTIVE_SESSIONS) { + UserSession* session = &acl->active_sessions[acl->num_active_sessions]; + strncpy(session->username, username, MAX_USERNAME_SIZE - 1); + session->username[MAX_USERNAME_SIZE - 1] = '\0'; + session->login_time = time(NULL); + session->is_active = true; + acl->num_active_sessions++; + return true; + } else { + printf("Warning: Maximum number of active sessions reached.\n"); + return false; + } + } + + return false; + } + } + + return false; +} + +// New function to check if a user is active +bool acl_is_user_active(ACL* acl, const char* username) { + for (uint32_t i = 0; i < acl->num_active_sessions; i++) { + if (strcmp(acl->active_sessions[i].username, username) == 0 && + acl->active_sessions[i].is_active) { + return true; + } + } + return false; +} + +// New function to logout a specific user +bool acl_logout_user(ACL* acl, const char* username) { + for (uint32_t i = 0; i < acl->num_active_sessions; i++) { + if (strcmp(acl->active_sessions[i].username, username) == 0) { + // Remove this session by shifting the rest down + for (uint32_t j = i; j < acl->num_active_sessions - 1; j++) { + acl->active_sessions[j] = acl->active_sessions[j + 1]; + } + acl->num_active_sessions--; + printf("User '%s' logged out.\n", username); + return true; + } + } + printf("User '%s' is not currently logged in.\n", username); + return false; +} + +// New function to list all active users +void acl_list_active_users(ACL* acl) { + if (acl->num_active_sessions == 0) { + printf("No active users.\n"); + return; + } + + printf("Active users (%u):\n", acl->num_active_sessions); + for (uint32_t i = 0; i < acl->num_active_sessions; i++) { + UserSession* session = &acl->active_sessions[i]; + + // Format login time + char time_buf[64]; + struct tm* tm_info = localtime(&session->login_time); + strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S", tm_info); + + printf(" - %s (logged in at %s)\n", session->username, time_buf); + } +} + +RoleType acl_get_user_role(const ACL* acl, const char* username) { + // Find user's role + for (uint32_t i = 0; i < acl->num_user_roles; i++) { + if (strcmp(acl->user_roles[i].username, username) == 0) { + return acl->user_roles[i].role; + } + } + + // Default to lowest privileges + return ROLE_USER; +} + +bool acl_has_permission(ACL* acl, const char* username, CommandType cmd_type) { + RoleType role = acl_get_user_role(acl, username); + + switch (role) { + case ROLE_ADMIN: + // Admin has all permissions + return true; + + case ROLE_DEVELOPER: + // Developer can create tables and write data, but can't delete or perform admin operations + return (cmd_type == CMD_READ || cmd_type == CMD_WRITE || cmd_type == CMD_CREATE); + + case ROLE_USER: + // User can only read + return cmd_type == CMD_READ; + + default: + return false; + } +} + +bool acl_is_admin(const ACL* acl, const char* username) { + return acl_get_user_role(acl, username) == ROLE_ADMIN; +} + +// Implementation of acl_login function +bool acl_login(ACL* acl, const char* username, const char* password) { + // Find user + int user_idx = -1; + for (uint32_t i = 0; i < acl->num_users; i++) { + if (strcmp(acl->users[i].username, username) == 0) { + user_idx = i; + break; + } + } + + if (user_idx == -1) { + return false; // User not found + } + + User* user = &acl->users[user_idx]; + + // Check if user is active + if (!user->is_active) { + return false; + } + + // Check password + char hashed_input[MAX_PASSWORD_SIZE]; + hash_password(password, hashed_input, MAX_PASSWORD_SIZE); + + if (strcmp(user->password_hash, hashed_input) == 0) { + // Set as current user + strncpy(acl->current_user, username, MAX_USERNAME_SIZE - 1); + acl->current_user[MAX_USERNAME_SIZE - 1] = '\0'; + + // Add or update session + bool found = false; + for (uint32_t i = 0; i < acl->num_active_sessions; i++) { + if (strcmp(acl->active_sessions[i].username, username) == 0) { + // Update existing session + acl->active_sessions[i].login_time = time(NULL); + acl->active_sessions[i].is_active = true; + found = true; + break; + } + } + + if (!found && acl->num_active_sessions < MAX_ACTIVE_SESSIONS) { + // Add new session + UserSession* session = &acl->active_sessions[acl->num_active_sessions]; + strncpy(session->username, username, MAX_USERNAME_SIZE - 1); + session->username[MAX_USERNAME_SIZE - 1] = '\0'; + session->login_time = time(NULL); + session->is_active = true; + acl->num_active_sessions++; + } + + return true; + } + + return false; +} + +// Function to create the initial admin user for a new database +void acl_create_admin(ACL* acl, const char* username, const char* password) { + // Find an empty slot for the new user + int slot = -1; + for (int i = 0; i < MAX_USERS; i++) { + if (!acl->users[i].is_active) { + slot = i; + break; + } + } + + if (slot == -1) { + // No empty slots available + return; + } + + // Initialize the admin user + acl->users[slot].is_active = true; + strncpy(acl->users[slot].username, username, MAX_USERNAME_SIZE - 1); + acl->users[slot].username[MAX_USERNAME_SIZE - 1] = '\0'; + + // Set password hash (hash the password) + hash_password(password, acl->users[slot].password_hash, MAX_PASSWORD_SIZE); + + // Add a role mapping for this user + int role_slot = -1; + for (int i = 0; i < MAX_USERS; i++) { + if (acl->user_roles[i].username[0] == '\0' || + strcmp(acl->user_roles[i].username, username) == 0) { + role_slot = i; + break; + } + } + + if (role_slot != -1) { + strncpy(acl->user_roles[role_slot].username, username, MAX_USERNAME_SIZE - 1); + acl->user_roles[role_slot].username[MAX_USERNAME_SIZE - 1] = '\0'; + acl->user_roles[role_slot].role = ROLE_ADMIN; + + if (acl->num_user_roles <= role_slot) { + acl->num_user_roles = role_slot + 1; + } + } + + // Update user count if needed + if (acl->num_users <= slot) { + acl->num_users = slot + 1; + } +} + +// Add a simplified logout function +void acl_logout(ACL* acl) { + // Clear the current user's name + acl->current_user[0] = '\0'; + + // Keep the session tracking intact for reconnection +} \ No newline at end of file diff --git a/src/command_processor.c b/src/command_processor.c index 6b5f257..43583fd 100644 --- a/src/command_processor.c +++ b/src/command_processor.c @@ -6,6 +6,7 @@ #include #include #include +#include // Add this for isspace() function void print_constants() { @@ -81,6 +82,9 @@ MetaCommandResult do_meta_command(Input_Buffer *buf, Database *db) { printf("Constants:\n"); print_constants(); return META_COMMAND_SUCCESS; + } else if (strcmp(buf->buffer, ".users") == 0) { + acl_list_active_users(&db->acl); + return META_COMMAND_SUCCESS; } return META_COMMAND_UNRECOGNIZED_COMMAND; @@ -122,7 +126,11 @@ PrepareResult prepare_insert(Input_Buffer *buf, Statement *statement) // Now extract the values - find opening parenthesis after VALUES char *open_paren = strchr(values_keyword, '('); if (!open_paren) { - return PREPARE_SYNTAX_ERROR; + // Check for alternative format: "insert into users (1, "abdul")" + open_paren = strchr(into_keyword, '('); + if (!open_paren || open_paren < values_keyword) { + return PREPARE_SYNTAX_ERROR; + } } // Find closing parenthesis @@ -221,35 +229,163 @@ PrepareResult prepare_insert(Input_Buffer *buf, Statement *statement) return PREPARE_SUCCESS; } else { - // Old syntax: insert 1 username email - // Use the existing implementation - char *id_string = strtok(buf->buffer, " "); // Will get "insert" - id_string = strtok(NULL, " "); // Get the actual ID - char *username = strtok(NULL, " "); - char *email = strtok(NULL, " "); + // Old syntax or partial new syntax: look for parentheses directly after table name + char *open_paren = strchr(sql, '('); + if (open_paren) { + // This looks like "insert into users (1, "abdul")" format + + // Extract table name before the parenthesis + char table_name[MAX_TABLE_NAME]; + char *table_start = sql + 6; // Skip "insert" + while (*table_start == ' ') table_start++; // Skip spaces + + // Check if "into" is present + if (strncasecmp(table_start, "into", 4) == 0) { + table_start += 4; // Skip "into" + while (*table_start == ' ') table_start++; // Skip spaces + } + + // Find end of table name (space or parenthesis) + char *table_end = table_start; + while (*table_end && *table_end != ' ' && *table_end != '(') table_end++; + + int table_name_len = table_end - table_start; + if (table_name_len <= 0 || table_name_len >= MAX_TABLE_NAME) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(table_name, table_start, table_name_len); + table_name[table_name_len] = '\0'; + + // Store table name in statement + strncpy(statement->table_name, table_name, MAX_TABLE_NAME - 1); + statement->table_name[MAX_TABLE_NAME - 1] = '\0'; + + // Find closing parenthesis + char *close_paren = strchr(open_paren, ')'); + if (!close_paren) { + return PREPARE_SYNTAX_ERROR; + } + + // Process values inside parentheses just like the other format + statement->num_values = 0; + statement->values = NULL; + + char *value_str = open_paren + 1; + while (value_str < close_paren && statement->num_values < MAX_COLUMNS) { + while (*value_str == ' ' || *value_str == '\t') value_str++; // Skip spaces + + if (value_str >= close_paren) break; + + // Allocate space for the new value + statement->values = realloc(statement->values, + (statement->num_values + 1) * sizeof(char*)); + if (!statement->values) { + return PREPARE_SYNTAX_ERROR; + } + + // Handle quoted strings + if (*value_str == '"' || *value_str == '\'') { + char quote_char = *value_str; + value_str++; // Skip opening quote + + // Find closing quote + char *end_quote = strchr(value_str, quote_char); + if (!end_quote || end_quote >= close_paren) { + return PREPARE_SYNTAX_ERROR; + } + + int value_len = end_quote - value_str; + char *value = malloc(value_len + 1); + if (!value) return PREPARE_SYNTAX_ERROR; + + strncpy(value, value_str, value_len); + value[value_len] = '\0'; + + statement->values[statement->num_values++] = value; + value_str = end_quote + 1; + } else { + // Handle non-quoted values (numbers, etc.) + char *comma = strchr(value_str, ','); + if (!comma || comma > close_paren) comma = close_paren; + + int value_len = comma - value_str; + while (value_len > 0 && (value_str[value_len-1] == ' ' || value_str[value_len-1] == '\t')) + value_len--; // Trim trailing spaces + + char *value = malloc(value_len + 1); + if (!value) return PREPARE_SYNTAX_ERROR; + + strncpy(value, value_str, value_len); + value[value_len] = '\0'; + + statement->values[statement->num_values++] = value; + value_str = comma; + } + + // Skip comma if present + if (*value_str == ',') value_str++; + } + + // For backward compatibility, still populate the old row_to_insert structure + if (statement->num_values >= 1) { + statement->row_to_insert.id = atoi(statement->values[0]); + // Check for negative ID - must check after conversion to int + if (atoi(statement->values[0]) < 0) { + for (uint32_t i = 0; i < statement->num_values; i++) { + free(statement->values[i]); + } + free(statement->values); + return PREPARE_NEGATIVE_ID; + } + } + + if (statement->num_values >= 2) { + strncpy(statement->row_to_insert.username, + statement->values[1], + COLUMN_USERNAME_SIZE); + statement->row_to_insert.username[COLUMN_USERNAME_SIZE] = '\0'; + } + + if (statement->num_values >= 3) { + strncpy(statement->row_to_insert.email, + statement->values[2], + COLUMN_EMAIL_SIZE); + statement->row_to_insert.email[COLUMN_EMAIL_SIZE] = '\0'; + } + + return PREPARE_SUCCESS; + } else { + // Old syntax: insert 1 username email + // Use the existing implementation + char *id_string = strtok(buf->buffer, " "); // Will get "insert" + id_string = strtok(NULL, " "); // Get the actual ID + char *username = strtok(NULL, " "); + char *email = strtok(NULL, " "); - if (id_string == NULL || username == NULL || email == NULL) { - return PREPARE_SYNTAX_ERROR; - } + if (id_string == NULL || username == NULL || email == NULL) { + return PREPARE_SYNTAX_ERROR; + } - int id = atoi(id_string); - if (id < 0) { - return PREPARE_NEGATIVE_ID; - } - - if (strlen(username) > COLUMN_USERNAME_SIZE) { - return PREPARE_STRING_TOO_LONG; - } - - if (strlen(email) > COLUMN_EMAIL_SIZE) { - return PREPARE_STRING_TOO_LONG; - } + int id = atoi(id_string); + if (id < 0) { + return PREPARE_NEGATIVE_ID; + } + + if (strlen(username) > COLUMN_USERNAME_SIZE) { + return PREPARE_STRING_TOO_LONG; + } + + if (strlen(email) > COLUMN_EMAIL_SIZE) { + return PREPARE_STRING_TOO_LONG; + } - statement->row_to_insert.id = id; - strcpy(statement->row_to_insert.username, username); - strcpy(statement->row_to_insert.email, email); - - return PREPARE_SUCCESS; + statement->row_to_insert.id = id; + strcpy(statement->row_to_insert.username, username); + strcpy(statement->row_to_insert.email, email); + + return PREPARE_SUCCESS; + } } } @@ -463,6 +599,33 @@ PrepareResult prepare_statement(Input_Buffer *buf, Statement *statement) else if (strncasecmp(buf->buffer, "show tables", 11) == 0) { return prepare_show_tables(buf, statement); } + else if (strncasecmp(buf->buffer, "create user", 11) == 0) { + return prepare_create_user(buf, statement); + } + else if (strncasecmp(buf->buffer, "drop user", 9) == 0) { + return prepare_drop_user(buf, statement); + } + else if (strncasecmp(buf->buffer, "grant role", 10) == 0) { + return prepare_grant_role(buf, statement); + } + else if (strncasecmp(buf->buffer, "revoke role", 11) == 0) { + return prepare_revoke_role(buf, statement); + } + else if (strncasecmp(buf->buffer, "login", 5) == 0) { + return prepare_login(buf, statement); + } + else if (strncasecmp(buf->buffer, "logout", 6) == 0) { + statement->type = STATEMENT_LOGOUT; + return PREPARE_SUCCESS; + } + else if (strncasecmp(buf->buffer, "enable auth", 11) == 0) { + statement->type = STATEMENT_ENABLE_AUTH; + return PREPARE_SUCCESS; + } + else if (strncasecmp(buf->buffer, "disable auth", 12) == 0) { + statement->type = STATEMENT_DISABLE_AUTH; + return PREPARE_SUCCESS; + } return PREPARE_UNRECOGNIZED_STATEMENT; } @@ -937,6 +1100,39 @@ ExecuteResult execute_show_tables(Statement* statement, Database* db) { // Add the execute statement implementation ExecuteResult execute_statement(Statement *statement, Database *db) { + // Handle ACL-specific commands first + switch (statement->type) { + case STATEMENT_CREATE_USER: + return execute_create_user(statement, db); + + case STATEMENT_DROP_USER: + return execute_drop_user(statement, db); + + case STATEMENT_GRANT_ROLE: + return execute_grant_role(statement, db); + + case STATEMENT_REVOKE_ROLE: + return execute_revoke_role(statement, db); + + case STATEMENT_LOGIN: + return execute_login(statement, db); + + case STATEMENT_LOGOUT: + return execute_logout(statement, db); + + case STATEMENT_ENABLE_AUTH: + return execute_enable_auth(statement, db); + + case STATEMENT_DISABLE_AUTH: + return execute_disable_auth(statement, db); + } + + // For non-ACL commands, check authentication if required + if (db->auth_required && db->acl.current_user[0] == '\0') { + printf("Error: Authentication required. Please login first.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + // Check if we need to switch tables first, but skip this for CREATE TABLE if (statement->table_name[0] != '\0' && statement->type != STATEMENT_CREATE_TABLE) { // A table was specified in the query @@ -1092,4 +1288,459 @@ ExecuteResult execute_database_statement(Statement *statement, Database **db_ptr default: return EXECUTE_UNRECOGNIZED_STATEMENT; } +} + +// Add these functions after execute_show_tables +ExecuteResult execute_create_user(Statement *statement, Database *db) { + // Only admin can create users + if (db->auth_required && !acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can create new users.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + // Add user to ACL + if (!acl_add_user(&db->acl, statement->user.username, statement->user.password)) { + printf("Error: Failed to create user '%s'. User might already exist.\n", + statement->user.username); + return EXECUTE_FAILED; + } + + // Assign role (if specified) + if (statement->user.role_specified) { + RoleType role; + if (strcasecmp(statement->user.role_str, "admin") == 0) { + role = ROLE_ADMIN; + } else if (strcasecmp(statement->user.role_str, "developer") == 0) { + role = ROLE_DEVELOPER; + } else { + role = ROLE_USER; + } + + if (!acl_assign_role(&db->acl, statement->user.username, role)) { + // If role assignment fails but it's an admin role, give a specific message + if (role == ROLE_ADMIN) { + printf("Error: Cannot create admin user '%s'. Only one admin user allowed.\n", + statement->user.username); + // Clean up by removing the user we just created + acl_delete_user(&db->acl, statement->user.username); + return EXECUTE_FAILED; + } else { + printf("Error: Failed to assign role to user '%s'.\n", statement->user.username); + return EXECUTE_FAILED; + } + } + } + + // printf("User '%s' created successfully.\n", statement->user.username); + return EXECUTE_SUCCESS; +} + +ExecuteResult execute_drop_user(Statement *statement, Database *db) { + // Only admin can drop users + if (db->auth_required && !acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can drop users.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + // Don't allow dropping current user + if (strcmp(statement->user.username, db->acl.current_user) == 0) { + printf("Error: Cannot drop currently logged in user.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + if (acl_delete_user(&db->acl, statement->user.username)) { + printf("User '%s' dropped successfully.\n", statement->user.username); + acl_save(&db->acl, db->name); + return EXECUTE_SUCCESS; + } else { + printf("Failed to drop user '%s'. User may not exist.\n", statement->user.username); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } +} + +ExecuteResult execute_grant_role(Statement *statement, Database *db) { + // Only admin can grant roles + if (db->auth_required && !acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can grant roles.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + if (acl_assign_role(&db->acl, statement->user.username, statement->user.role)) { + const char* role_name; + switch (statement->user.role) { + case ROLE_ADMIN: role_name = "Admin"; break; + case ROLE_DEVELOPER: role_name = "Developer"; break; + case ROLE_USER: role_name = "User"; break; + default: role_name = "Unknown"; break; + } + + printf("Granted role '%s' to user '%s'.\n", role_name, statement->user.username); + acl_save(&db->acl, db->name); + return EXECUTE_SUCCESS; + } else { + printf("Failed to grant role to user '%s'. User may not exist.\n", statement->user.username); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } +} + +ExecuteResult execute_revoke_role(Statement *statement, Database *db) { + // Only admin can revoke roles + if (db->auth_required && !acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can revoke roles.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + // Don't allow revoking from current user + if (strcmp(statement->user.username, db->acl.current_user) == 0) { + printf("Error: Cannot revoke role from currently logged in user.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + if (acl_remove_role(&db->acl, statement->user.username)) { + printf("Revoked role from user '%s'.\n", statement->user.username); + acl_save(&db->acl, db->name); + return EXECUTE_SUCCESS; + } else { + printf("Failed to revoke role from user '%s'. User may not exist or have no role.\n", statement->user.username); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } +} + +ExecuteResult execute_login(Statement *statement, Database *db) { + if (acl_authenticate(&db->acl, statement->user.username, statement->user.password)) { + printf("Logged in as '%s'.\n", statement->user.username); + return EXECUTE_SUCCESS; + } else { + printf("Invalid username or password.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } +} + +ExecuteResult execute_logout(Statement *statement, Database *db) { + if (statement->user.username[0] != '\0') { + // Logout specific user + return acl_logout_user(&db->acl, statement->user.username) ? + EXECUTE_SUCCESS : EXECUTE_UNRECOGNIZED_STATEMENT; + } else { + // Need to determine which user is executing this command + printf("Error: Please specify which user to logout.\n"); + printf("Usage: LOGOUT username\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } +} + +ExecuteResult execute_enable_auth(Statement *statement, Database *db) { + (void)statement; // Unused parameter + + db_enable_auth(db); + return EXECUTE_SUCCESS; +} + +ExecuteResult execute_disable_auth(Statement *statement, Database *db) { + (void)statement; // Unused parameter + + // Only admin can disable auth + if (db->auth_required && !acl_is_admin(&db->acl, db->acl.current_user)) { + printf("Error: Only admin users can disable authentication.\n"); + return EXECUTE_UNRECOGNIZED_STATEMENT; + } + + db_disable_auth(db); + return EXECUTE_SUCCESS; +} + +// Parse CREATE USER command +PrepareResult prepare_create_user(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_CREATE_USER; + + // Format: CREATE USER username WITH PASSWORD 'password' [ROLE 'role'] + char *sql = buf->buffer; + char *username_start = strcasestr(sql, "user") + 4; + while (*username_start == ' ') username_start++; + + // Find the end of username (before WITH) + char *username_end = strcasestr(username_start, "with"); + if (!username_end) { + return PREPARE_SYNTAX_ERROR; + } + + // Extract username + int username_len = username_end - username_start; + while (username_len > 0 && isspace(username_start[username_len-1])) username_len--; + if (username_len <= 0 || username_len >= MAX_USERNAME_SIZE) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(statement->user.username, username_start, username_len); + statement->user.username[username_len] = '\0'; + + // Find PASSWORD + char *pwd_keyword = strcasestr(username_end, "password"); + if (!pwd_keyword) { + return PREPARE_SYNTAX_ERROR; + } + + // Skip to password value (after PASSWORD) + char *pwd_start = pwd_keyword + 8; + while (*pwd_start == ' ') pwd_start++; + + // Initialize role fields + statement->user.role_specified = false; + statement->user.role = ROLE_USER; // default role + + // Handle quoted password + if (*pwd_start == '\'' || *pwd_start == '\"') { + char quote = *pwd_start; + pwd_start++; // Skip opening quote + char *pwd_end = strchr(pwd_start, quote); + if (!pwd_end) { + return PREPARE_SYNTAX_ERROR; + } + + int pwd_len = pwd_end - pwd_start; + if (pwd_len <= 0 || pwd_len >= MAX_PASSWORD_SIZE) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(statement->user.password, pwd_start, pwd_len); + statement->user.password[pwd_len] = '\0'; + + // Check for optional ROLE + char *role_keyword = strcasestr(pwd_end, "role"); + if (role_keyword) { + statement->user.role_specified = true; + char *role_start = role_keyword + 4; + while (*role_start == ' ') role_start++; + + // Handle quoted role + if (*role_start == '\'' || *role_start == '\"') { + char quote = *role_start; + role_start++; // Skip opening quote + char *role_end = strchr(role_start, quote); + if (!role_end) { + return PREPARE_SYNTAX_ERROR; + } + + int role_len = role_end - role_start; + strncpy(statement->user.role_str, role_start, role_len); + statement->user.role_str[role_len] = '\0'; + + // Convert role name to enum + if (strcasecmp(statement->user.role_str, "admin") == 0) { + statement->user.role = ROLE_ADMIN; + } else if (strcasecmp(statement->user.role_str, "developer") == 0) { + statement->user.role = ROLE_DEVELOPER; + } else if (strcasecmp(statement->user.role_str, "user") == 0) { + statement->user.role = ROLE_USER; + } else { + return PREPARE_SYNTAX_ERROR; + } + } + } + + return PREPARE_SUCCESS; + } + + return PREPARE_SYNTAX_ERROR; +} + +// Parse DROP USER command +PrepareResult prepare_drop_user(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_DROP_USER; + + // Parse: DROP USER username + char *token = strtok(buf->buffer, " \t"); // Skip "DROP" + token = strtok(NULL, " \t"); // Skip "USER" + token = strtok(NULL, " \t"); // Get username + + if (!token) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(statement->user.username, token, MAX_USERNAME_SIZE - 1); + statement->user.username[MAX_USERNAME_SIZE - 1] = '\0'; + + return PREPARE_SUCCESS; +} + +// Parse GRANT ROLE command +PrepareResult prepare_grant_role(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_GRANT_ROLE; + + // Parse: GRANT ROLE role TO username + char *token = strtok(buf->buffer, " \t"); // Skip "GRANT" + token = strtok(NULL, " \t"); // Skip "ROLE" + token = strtok(NULL, " \t"); // Get role + + if (!token) { + return PREPARE_SYNTAX_ERROR; + } + + if (strcasecmp(token, "admin") == 0) { + statement->user.role = ROLE_ADMIN; + } else if (strcasecmp(token, "developer") == 0) { + statement->user.role = ROLE_DEVELOPER; + } else if (strcasecmp(token, "user") == 0) { + statement->user.role = ROLE_USER; + } else { + return PREPARE_SYNTAX_ERROR; + } + + token = strtok(NULL, " \t"); // Skip "TO" + if (!token || strcasecmp(token, "to") != 0) { + return PREPARE_SYNTAX_ERROR; + } + + token = strtok(NULL, " \t"); // Get username + if (!token) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(statement->user.username, token, MAX_USERNAME_SIZE - 1); + statement->user.username[MAX_USERNAME_SIZE - 1] = '\0'; + + return PREPARE_SUCCESS; +} + +// Parse REVOKE ROLE command +PrepareResult prepare_revoke_role(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_REVOKE_ROLE; + + // Parse: REVOKE ROLE FROM username + char *token = strtok(buf->buffer, " \t"); // Skip "REVOKE" + token = strtok(NULL, " \t"); // Skip "ROLE" + + if (!token || strcasecmp(token, "role") != 0) { + return PREPARE_SYNTAX_ERROR; + } + + token = strtok(NULL, " \t"); // Skip "FROM" + if (!token || strcasecmp(token, "from") != 0) { + return PREPARE_SYNTAX_ERROR; + } + + token = strtok(NULL, " \t"); // Get username + if (!token) { + return PREPARE_SYNTAX_ERROR; + } + + strncpy(statement->user.username, token, MAX_USERNAME_SIZE - 1); + statement->user.username[MAX_USERNAME_SIZE - 1] = '\0'; + + return PREPARE_SUCCESS; +} + +// Parse LOGIN command +PrepareResult prepare_login(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_LOGIN; + + // Parse: LOGIN username 'password' + char *sql = buf->buffer; + char *username_start = sql + 5; // Skip "LOGIN" + while (*username_start == ' ') username_start++; + + // Find end of username (space before password) + char *username_end = username_start; + while (*username_end && !isspace(*username_end)) username_end++; + + if (username_end == username_start) { + return PREPARE_SYNTAX_ERROR; + } + + // Extract username + int username_len = username_end - username_start; + strncpy(statement->user.username, username_start, username_len); + statement->user.username[username_len] = '\0'; + + // Find password (should be quoted) + char *pwd_start = username_end; + while (*pwd_start && isspace(*pwd_start)) pwd_start++; + + if (*pwd_start == '\'' || *pwd_start == '\"') { + char quote = *pwd_start; + pwd_start++; // Skip opening quote + char *pwd_end = strchr(pwd_start, quote); + if (!pwd_end) { + return PREPARE_SYNTAX_ERROR; + } + + int pwd_len = pwd_end - pwd_start; + strncpy(statement->user.password, pwd_start, pwd_len); + statement->user.password[pwd_len] = '\0'; + + return PREPARE_SUCCESS; + } + + return PREPARE_SYNTAX_ERROR; +} + +// Parse LOGOUT command +PrepareResult prepare_logout(Input_Buffer *buf, Statement *statement) { + statement->type = STATEMENT_LOGOUT; + + // Check if a specific username is provided + char* token = strtok(buf->buffer, " \t"); // Skip "LOGOUT" + token = strtok(NULL, " \t"); // Get username if provided + + if (token) { + // Logout a specific user + strncpy(statement->user.username, token, MAX_USERNAME_SIZE - 1); + statement->user.username[MAX_USERNAME_SIZE - 1] = '\0'; + } else { + // No username specified, will logout the current session's user (determined later) + statement->user.username[0] = '\0'; + } + + return PREPARE_SUCCESS; +} + +// Add this function after the existing functions +bool check_permission(Database* db, Statement* statement) { + // Admin can do anything + if (acl_is_admin(&db->acl, db->acl.current_user)) { + return true; + } + + // Map statement type to command type for permission checking + CommandType cmd_type; + switch (statement->type) { + case STATEMENT_SELECT: + case STATEMENT_SELECT_BY_ID: + case STATEMENT_SHOW_TABLES: + cmd_type = CMD_READ; + break; + + case STATEMENT_INSERT: + case STATEMENT_UPDATE: + cmd_type = CMD_WRITE; + break; + + case STATEMENT_DELETE: + cmd_type = CMD_DELETE; // Map DELETE to its own command type + break; + + case STATEMENT_CREATE_TABLE: + case STATEMENT_CREATE_DATABASE: + cmd_type = CMD_CREATE; + break; + + case STATEMENT_DROP_USER: + cmd_type = CMD_DROP; + break; + + case STATEMENT_GRANT_ROLE: + cmd_type = CMD_GRANT; + break; + + case STATEMENT_REVOKE_ROLE: + cmd_type = CMD_REVOKE; + break; + + default: + // For any other command types, delegate to the acl_has_permission + return acl_has_permission(&db->acl, db->acl.current_user, CMD_READ); + } + + // Check if user has permission for this command type + return acl_has_permission(&db->acl, db->acl.current_user, cmd_type); } \ No newline at end of file diff --git a/src/database.c b/src/database.c index 61deeb1..3ebb9c9 100644 --- a/src/database.c +++ b/src/database.c @@ -78,7 +78,24 @@ Database* db_create_database(const char* name) { } // Open or create the database - return db_open_database(name); + Database* db = db_open_database(name); + if (db) { + db_init_transactions(db, 10); // Support up to 10 concurrent transactions + } + + // Initialize ACL + acl_init(&db->acl); + + // Create default admin user with the correct password 'jhaz' + acl_create_admin(&db->acl, "admin", "jhaz"); + + // Save ACL + acl_save(&db->acl, name); + + // By default, auth is required + db->auth_required = true; + + return db; } Database* db_open_database(const char* name) { @@ -154,6 +171,18 @@ Database* db_open_database(const char* name) { } } + db_init_transactions(db, 10); // Support up to 10 concurrent transactions + // Load ACL + if (!acl_load(&db->acl, name)) { + // If ACL loading fails, create a default one + acl_init(&db->acl); + acl_create_admin(&db->acl, "admin", "admin"); + acl_save(&db->acl, name); + } + + // By default, auth is required + db->auth_required = true; + return db; } @@ -232,10 +261,19 @@ bool db_use_table(Database* db, const char* table_name) { } void db_close_database(Database* db) { - if (db == NULL) { - return; + if (!db) return; + + // Rollback any active transaction + if (db->active_txn_id != 0) { + printf("Warning: Rolling back active transaction %u before closing database.\n", + db->active_txn_id); + txn_rollback(&db->txn_manager, db->active_txn_id); + db->active_txn_id = 0; } + // Free transaction manager resources + txn_manager_free(&db->txn_manager); + // Save current active table's root page number if (db->active_table) { TableDef* table_def = catalog_get_active_table(&db->catalog); @@ -250,6 +288,9 @@ void db_close_database(Database* db) { // Save catalog before closing catalog_save(&db->catalog, db->name); + // Save ACL before closing + acl_save(&db->acl, db->name); + free(db); } @@ -268,4 +309,95 @@ bool catalog_save_to_database(Catalog* catalog, const char* db_name) { // Implement this function or return false fclose(file); return false; // Not implemented +} + +void db_init_transactions(Database* db, uint32_t capacity) { + if (!db) return; + txn_manager_init(&db->txn_manager, capacity); + db->active_txn_id = 0; +} + +uint32_t db_begin_transaction(Database* db) { + if (!db) return 0; + + // If there's already an active transaction, use that + if (db->active_txn_id != 0 && txn_is_active(&db->txn_manager, db->active_txn_id)) { + printf("Using existing transaction %u\n", db->active_txn_id); + return db->active_txn_id; + } + + uint32_t txn_id = txn_begin(&db->txn_manager); + if (txn_id != 0) { + db->active_txn_id = txn_id; + } + + return txn_id; +} + +bool db_commit_transaction(Database* db) { + if (!db || db->active_txn_id == 0) { + printf("No active transaction to commit.\n"); + return false; + } + + bool success = txn_commit(&db->txn_manager, db->active_txn_id); + if (success) { + db->active_txn_id = 0; + } + + return success; +} + +bool db_rollback_transaction(Database* db) { + if (!db || db->active_txn_id == 0) { + printf("No active transaction to rollback.\n"); + return false; + } + + bool success = txn_rollback(&db->txn_manager, db->active_txn_id); + if (success) { + db->active_txn_id = 0; + } + + return success; +} + +bool db_set_active_transaction(Database* db, uint32_t txn_id) { + if (!db) return false; + + if (txn_id == 0 || !txn_is_active(&db->txn_manager, txn_id)) { + printf("Invalid transaction ID or transaction not active.\n"); + return false; + } + + db->active_txn_id = txn_id; + return true; +} + +void db_enable_transactions(Database* db) { + if (!db) return; + txn_manager_enable(&db->txn_manager); +} + +void db_disable_transactions(Database* db) { + if (!db) return; + + // If there's an active transaction, rollback first + if (db->active_txn_id != 0) { + db_rollback_transaction(db); + } + + txn_manager_disable(&db->txn_manager); +} + +void db_enable_auth(Database* db) { + if (!db) return; + db->auth_required = true; + printf("Authentication enabled for database '%s'.\n", db->name); +} + +void db_disable_auth(Database* db) { + if (!db) return; + db->auth_required = false; + printf("Authentication disabled for database '%s'.\n", db->name); } \ No newline at end of file diff --git a/src/input_handling.c b/src/input_handling.c index 4c845b0..cf88828 100644 --- a/src/input_handling.c +++ b/src/input_handling.c @@ -6,6 +6,7 @@ Input_Buffer *newInputBuffer() { Input_Buffer *buf = (Input_Buffer *)malloc(sizeof(Input_Buffer)); buf->buffer = NULL; buf->buffer_length = buf->input_length = 0; + buf->prompt_displayed = false; // Initialize prompt_displayed return buf; } @@ -35,7 +36,11 @@ void read_input(Input_Buffer *buf) { buf->buffer_length = buffer_size; } - print_prompt(); + // Only show prompt if it hasn't been displayed yet + if (!buf->prompt_displayed) { + print_prompt(); + buf->prompt_displayed = true; // Set the flag to true after displaying + } size_t position = 0; int c; diff --git a/src/queue.c b/src/queue.c index 40d648b..70a010c 100644 --- a/src/queue.c +++ b/src/queue.c @@ -11,7 +11,7 @@ bool queue_enqueue(Queue* queue, void* data, uint32_t page_num, uint32_t level) QueueNode* new_node = (QueueNode*)malloc(sizeof(QueueNode)); if (new_node == NULL) { return false; - }Z + } new_node->data = data; new_node->page_num = page_num; new_node->level = level; diff --git a/src/table.c b/src/table.c index 4d6b1ce..9db0279 100644 --- a/src/table.c +++ b/src/table.c @@ -799,7 +799,7 @@ void print_dynamic_row(DynamicRow* row, TableDef* table_def) { printf("%d", dynamic_row_get_int(row, table_def, i)); break; case COLUMN_TYPE_FLOAT: - printf("%f", dynamic_row_get_float(row, table_def, i)); + printf("%.2f", dynamic_row_get_float(row, table_def, i)); break; case COLUMN_TYPE_BOOLEAN: printf("%s", dynamic_row_get_boolean(row, table_def, i) ? "TRUE" : "FALSE"); @@ -834,3 +834,42 @@ void print_dynamic_row(DynamicRow* row, TableDef* table_def) { printf(")\n"); } + +void print_dynamic_column(DynamicRow* row, TableDef* table_def, uint32_t col_idx) { + if (col_idx >= table_def->num_columns) { + printf("ERROR"); + return; + } + + ColumnDef* col = &table_def->columns[col_idx]; + + switch (col->type) { + case COLUMN_TYPE_INT: + printf("%d", dynamic_row_get_int(row, table_def, col_idx)); + break; + case COLUMN_TYPE_STRING: + printf("%s", dynamic_row_get_string(row, table_def, col_idx)); + break; + case COLUMN_TYPE_FLOAT: + printf("%.2f", dynamic_row_get_float(row, table_def, col_idx)); + break; + case COLUMN_TYPE_BOOLEAN: + printf("%s", dynamic_row_get_boolean(row, table_def, col_idx) ? "true" : "false"); + break; + case COLUMN_TYPE_DATE: + printf("%d", dynamic_row_get_date(row, table_def, col_idx)); + break; + case COLUMN_TYPE_TIME: + printf("%d", dynamic_row_get_time(row, table_def, col_idx)); + break; + case COLUMN_TYPE_TIMESTAMP: + printf("%ld", dynamic_row_get_timestamp(row, table_def, col_idx)); + break; + case COLUMN_TYPE_BLOB: + printf("[BLOB]"); + break; + default: + printf("?"); + break; + } +} diff --git a/src/transaction.c b/src/transaction.c new file mode 100644 index 0000000..5ade0d7 --- /dev/null +++ b/src/transaction.c @@ -0,0 +1,337 @@ +#include "../include/transaction.h" +#include +#include +#include + +void txn_manager_init(TransactionManager* manager, uint32_t capacity) { + manager->transactions = malloc(sizeof(Transaction) * capacity); + manager->capacity = capacity; + manager->count = 0; + manager->next_id = 1; // Start with txn_id = 1 + manager->enabled = false; + + // Initialize all transactions to idle state + for (uint32_t i = 0; i < capacity; i++) { + manager->transactions[i].id = 0; // 0 means unused slot + manager->transactions[i].state = TRANSACTION_IDLE; + manager->transactions[i].changes = NULL; + manager->transactions[i].change_count = 0; + } +} + +void txn_free_changes(RowChange* changes) { + RowChange* current = changes; + while (current != NULL) { + RowChange* next = current->next; + if (current->old_data) { + free(current->old_data); + } + free(current); + current = next; + } +} + +void txn_manager_free(TransactionManager* manager) { + if (!manager) return; + + // Free all transaction data + for (uint32_t i = 0; i < manager->capacity; i++) { + txn_free_changes(manager->transactions[i].changes); + } + + free(manager->transactions); + manager->transactions = NULL; + manager->capacity = 0; + manager->count = 0; +} + +bool txn_manager_enable(TransactionManager* manager) { + if (!manager) return false; + manager->enabled = true; + printf("Transaction support enabled.\n"); + return true; +} + +bool txn_manager_disable(TransactionManager* manager) { + if (!manager) return false; + + // Check if any active transactions exist + for (uint32_t i = 0; i < manager->capacity; i++) { + if (manager->transactions[i].id != 0 && + manager->transactions[i].state == TRANSACTION_ACTIVE) { + printf("Cannot disable transactions: active transactions exist.\n"); + return false; + } + } + + manager->enabled = false; + printf("Transaction support disabled.\n"); + return true; +} + +bool txn_manager_is_enabled(TransactionManager* manager) { + if (!manager) return false; + return manager->enabled; +} + +// Find an available transaction slot or returns -1 +static int find_available_slot(TransactionManager* manager) { + for (uint32_t i = 0; i < manager->capacity; i++) { + if (manager->transactions[i].id == 0) { + return i; + } + } + return -1; +} + +// Find transaction by ID, returns index or -1 if not found +static int find_transaction(TransactionManager* manager, uint32_t txn_id) { + for (uint32_t i = 0; i < manager->capacity; i++) { + if (manager->transactions[i].id == txn_id) { + return i; + } + } + return -1; +} + +uint32_t txn_begin(TransactionManager* manager) { + if (!manager || !manager->enabled) { + return 0; // 0 means invalid transaction + } + + // Check if we've reached capacity + if (manager->count >= manager->capacity) { + printf("Error: Maximum number of concurrent transactions reached.\n"); + return 0; + } + + int slot = find_available_slot(manager); + if (slot < 0) { + printf("Error: No available transaction slots.\n"); + return 0; + } + + uint32_t txn_id = manager->next_id++; + if (manager->next_id == 0) manager->next_id = 1; // Avoid 0 as it's invalid + + Transaction* txn = &manager->transactions[slot]; + txn->id = txn_id; + txn->state = TRANSACTION_ACTIVE; + txn->start_time = time(NULL); + txn->changes = NULL; + txn->change_count = 0; + + manager->count++; + + printf("Transaction %u started.\n", txn_id); + return txn_id; +} + +bool txn_commit(TransactionManager* manager, uint32_t txn_id) { + if (!manager || !manager->enabled || txn_id == 0) { + return false; + } + + int txn_idx = find_transaction(manager, txn_id); + if (txn_idx < 0) { + printf("Error: Transaction %u not found.\n", txn_id); + return false; + } + + Transaction* txn = &manager->transactions[txn_idx]; + if (txn->state != TRANSACTION_ACTIVE) { + printf("Error: Cannot commit transaction %u, not active.\n", txn_id); + return false; + } + + // Free any tracked changes as they're no longer needed + txn_free_changes(txn->changes); + txn->changes = NULL; + + // Mark as committed + txn->state = TRANSACTION_COMMITTED; + + printf("Transaction %u committed successfully.\n", txn_id); + + // Clean up the transaction + txn->id = 0; // Mark slot as available + txn->state = TRANSACTION_IDLE; + manager->count--; + + return true; +} + +bool txn_rollback(TransactionManager* manager, uint32_t txn_id) { + if (!manager || !manager->enabled || txn_id == 0) { + return false; + } + + int txn_idx = find_transaction(manager, txn_id); + if (txn_idx < 0) { + printf("Error: Transaction %u not found.\n", txn_id); + return false; + } + + Transaction* txn = &manager->transactions[txn_idx]; + if (txn->state != TRANSACTION_ACTIVE) { + printf("Error: Cannot rollback transaction %u, not active.\n", txn_id); + return false; + } + + // TODO: Apply rollback changes in reverse order + // This requires implementing a way to restore original data + // to the respective pages + + // For now, just print what would be rolled back + printf("Rolling back transaction %u (%u changes):\n", txn_id, txn->change_count); + + RowChange* change = txn->changes; + while (change != NULL) { + printf(" - Reverting change to key %u on page %u, cell %u\n", + change->key, change->page_num, change->cell_num); + change = change->next; + } + + // Free the change tracking data + txn_free_changes(txn->changes); + txn->changes = NULL; + + // Mark as aborted + txn->state = TRANSACTION_ABORTED; + + printf("Transaction %u rolled back.\n", txn_id); + + // Clean up the transaction + txn->id = 0; // Mark slot as available + txn->state = TRANSACTION_IDLE; + manager->count--; + + return true; +} + +bool txn_is_active(TransactionManager* manager, uint32_t txn_id) { + if (!manager || !manager->enabled || txn_id == 0) { + return false; + } + + int txn_idx = find_transaction(manager, txn_id); + if (txn_idx < 0) { + return false; + } + + return manager->transactions[txn_idx].state == TRANSACTION_ACTIVE; +} + +bool txn_record_change(TransactionManager* manager, + uint32_t txn_id, + uint32_t page_num, + uint32_t cell_num, + uint32_t key, + void* old_data, + uint32_t old_size) { + if (!manager || !manager->enabled || txn_id == 0 || !old_data) { + return false; + } + + int txn_idx = find_transaction(manager, txn_id); + if (txn_idx < 0) { + return false; + } + + Transaction* txn = &manager->transactions[txn_idx]; + if (txn->state != TRANSACTION_ACTIVE) { + return false; + } + + // Create new change record + RowChange* change = malloc(sizeof(RowChange)); + if (!change) { + return false; + } + + // Make a copy of the old data for potential rollback + void* data_copy = malloc(old_size); + if (!data_copy) { + free(change); + return false; + } + + memcpy(data_copy, old_data, old_size); + + // Set up the change record + change->page_num = page_num; + change->cell_num = cell_num; + change->key = key; + change->old_data = data_copy; + change->old_size = old_size; + change->next = txn->changes; // Add to front of list + + // Update transaction + txn->changes = change; + txn->change_count++; + + return true; +} + +void txn_print_status(TransactionManager* manager, uint32_t txn_id) { + if (!manager || txn_id == 0) { + printf("Invalid transaction.\n"); + return; + } + + int txn_idx = find_transaction(manager, txn_id); + if (txn_idx < 0) { + printf("Transaction %u not found.\n", txn_id); + return; + } + + Transaction* txn = &manager->transactions[txn_idx]; + + printf("Transaction %u: ", txn_id); + switch (txn->state) { + case TRANSACTION_IDLE: + printf("IDLE"); + break; + case TRANSACTION_ACTIVE: + printf("ACTIVE"); + break; + case TRANSACTION_COMMITTED: + printf("COMMITTED"); + break; + case TRANSACTION_ABORTED: + printf("ABORTED"); + break; + } + + printf(", Changes: %u\n", txn->change_count); + + // Convert start time to readable format + char time_buf[64]; + struct tm* tm_info = localtime(&txn->start_time); + strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S", tm_info); + + printf("Started: %s\n", time_buf); +} + +void txn_print_all(TransactionManager* manager) { + if (!manager) { + return; + } + + printf("Transaction Manager Status:\n"); + printf("Enabled: %s\n", manager->enabled ? "YES" : "NO"); + printf("Active transactions: %u/%u\n", manager->count, manager->capacity); + + bool found_active = false; + for (uint32_t i = 0; i < manager->capacity; i++) { + if (manager->transactions[i].id != 0) { + found_active = true; + printf("------------------------------------------\n"); + txn_print_status(manager, manager->transactions[i].id); + } + } + + if (!found_active) { + printf("No active transactions.\n"); + } +} \ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 5ccb7f5..4f7a4e6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -27,3 +29,23 @@ char* strcasestr(const char* haystack, const char* needle) { return NULL; } + +int count_commas(const char* str, size_t len) { + int count = 0; + for (size_t i = 0; i < len; i++) { + if (str[i] == ',') { + count++; + } + } + return count; +} + +char* my_strdup(const char* str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char* new_str = malloc(len); + if (new_str) { + memcpy(new_str, str, len); + } + return new_str; +} \ No newline at end of file diff --git a/tests/insert_dummy_data.py b/tests/insert_dummy_data.py new file mode 100644 index 0000000..53c3509 --- /dev/null +++ b/tests/insert_dummy_data.py @@ -0,0 +1,40 @@ +import subprocess + +def run_script(commands, program="./bin/db-project"): + process = subprocess.Popen([program], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + try: + for command in commands: + process.stdin.write(command + "\n") + process.stdin.close() + raw_output = process.stdout.read() + process.stdout.close() + process.wait() + return raw_output.split("\n") + except BrokenPipeError: + raw_output = process.stdout.read() + process.stdout.close() + process.wait() + return raw_output.split("\n") + +def insert_dummy_data(): + commands = [ + "create database school", + "use database school", + "create table students (id INT, name STRING, father_name STRING, gpa FLOAT, age INT, gender STRING)", + "use table students", + "insert into students values (1, 'John Doe', 'Richard Roe', 3.5, 20, 'M')", + "insert into students values (2, 'Jane Smith', 'John Smith', 3.8, 22, 'F')", + "insert into students values (3, 'Alice Johnson', 'Robert Johnson', 3.2, 19, 'F')", + "insert into students values (4, 'Bob Brown', 'Michael Brown', 3.9, 21, 'M')", + "insert into students values (5, 'Charlie Davis', 'David Davis', 3.6, 23, 'M')", + ".exit" + ] + try: + output = run_script(commands) + for line in output: + print(line) + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + insert_dummy_data() diff --git a/test_db.py b/tests/test_db.py similarity index 100% rename from test_db.py rename to tests/test_db.py diff --git a/ui/frontEnd.py b/ui/frontEnd.py deleted file mode 100644 index e425db9..0000000 --- a/ui/frontEnd.py +++ /dev/null @@ -1,117 +0,0 @@ -import sys -from PyQt6.QtCore import Qt, QEasingCurve, QPropertyAnimation -from PyQt6.QtGui import QFont -from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, - QLabel, QPlainTextEdit, QPushButton, QTableWidget) - - -class DatabaseManager(QMainWindow): - def __init__(self): - super().__init__() - self.setWindowTitle("Lightweight Database Manager") - self.setGeometry(100, 100, 800, 600) - self.setStyleSheet(self.main_stylesheet()) - - self.conn = None - self.cursor = None - - self.init_ui() - - def init_ui(self): - # Central widget and layout - central_widget = QWidget() - self.setCentralWidget(central_widget) - main_layout = QVBoxLayout(central_widget) - - # Header - title_label = QLabel("Lightweight Database Manager") - title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) - title_label.setStyleSheet("font-size: 24px; font-weight: bold; margin-bottom: 10px;") - main_layout.addWidget(title_label) - - # Command input section - main_layout.addWidget(QLabel("Enter SQL Commands:")) - self.command_input = QPlainTextEdit() - self.command_input.setStyleSheet("border: 2px solid #2A9D8F; border-radius: 5px; padding: 5px;") - main_layout.addWidget(self.command_input) - - # Execution controls - button_layout = QHBoxLayout() - execute_button = QPushButton("Execute Command") - execute_button.setStyleSheet(self.button_stylesheet()) - execute_button.clicked.connect(self.animate_button) - execute_button.clicked.connect(self.execute_command) - button_layout.addWidget(execute_button) - - clear_button = QPushButton("Clear Command") - clear_button.setStyleSheet(self.button_stylesheet()) - clear_button.clicked.connect(self.animate_button) - clear_button.clicked.connect(self.clear_command) - button_layout.addWidget(clear_button) - - main_layout.addLayout(button_layout) - - # Results display section - self.results_table = QTableWidget() - self.results_table.setStyleSheet("border: 2px solid #264653; border-radius: 5px; background-color: #E9C46A;") - main_layout.addWidget(self.results_table) - - # Status message - self.status_message = QLabel() - self.status_message.setStyleSheet("color: #F4A261; font-size: 16px;") - main_layout.addWidget(self.status_message) - - # Footer (status bar) - self.statusBar().showMessage("No database connected") - - def button_stylesheet(self): - return """ - QPushButton { - background-color: #2A9D8F; - color: white; - font-size: 16px; - padding: 5px 15px; - border-radius: 10px; - border: 2px solid #264653; - } - QPushButton:hover { - background-color: #E76F51; - } - QPushButton:pressed { - background-color: #264653; - } - """ - - def main_stylesheet(self): - return """ - QMainWindow { - background-color: #1D3557; - color: white; - } - QLabel { - color: white; - } - """ - - def animate_button(self): - sender = self.sender() - anim = QPropertyAnimation(sender, b"geometry") - anim.setDuration(150) - anim.setStartValue(sender.geometry()) - anim.setEndValue(sender.geometry().adjusted(-5, -5, 5, 5)) - anim.setEasingCurve(QEasingCurve.Type.OutBounce) - anim.start() - - def clear_command(self): - self.command_input.clear() - - def execute_command(self): - # Logic remains the same as before - pass - - -if __name__ == '__main__': - app = QApplication([]) - window = DatabaseManager() - window.show() - sys.exit(app.exec())