Compiler Design Lab work | 5th Sem | CSE

 

Date: 06/08/2024

EXPERIMENT NO. – 1

 

AIM: Write a C++ program to count the number of different types of tokens in a given program to simulate Tokenisation in Lexical phase of Compiler Design. 

 

THEORY:

Tokenization in the lexical phase of compiler design is the process of breaking down the source code into fundamental units called tokens. These tokens represent the smallest elements of the code with meaningful semantics, such as keywords, identifiers, literals, operators, and special symbols. During this phase, the lexer (or lexical analyzer) scans the input source code sequentially, identifies these tokens based on predefined patterns, and categorizes them accordingly. This step is crucial as it transforms raw code into a structured format that can be further analyzed and processed by the subsequent phases of the compiler, such as syntax analysis and semantic analysis.

 

CODE:

#include <iostream>

#include <sstream>

#include <string>

#include <unordered_set>

#include <vector>

#include <cctype>

#include <algorithm>

 

using namespace std;

 

bool isKeyword(const string &token)

{

    const vector<string> keywords = {

        "int", "float", "double", "char", "void", "return", "if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue", "class", "public", "private", "protected", "static", "const", "typedef", "namespace", "using", "template", "try", "catch", "throw", "virtual"};

 

    return find(keywords.begin(), keywords.end(), token) != keywords.end();

}

 

bool isOperator(const string &token)

{

    const vector<string> operators = {

        "+", "-", "*", "/", "%", "++", "--", "==", "!=", "<", ">", "<=", ">=",

        "&&", "||", "!", "=", "+=", "-=", "*=", "/=", "%=", ">>", "<<", "&", "|", "^", "~"};

    return find(operators.begin(), operators.end(), token) != operators.end();

}

 

bool isSpecialSymbol(const string &token)

{

    const vector<string> specialSymbols = {

        "{", "}", "(", ")", ";", ",", "[", "]", "->", "::"};

    return find(specialSymbols.begin(), specialSymbols.end(), token) != specialSymbols.end();

}

 

int main()

{

    unordered_set<string> identifiers;

    unordered_set<string> operators;

    unordered_set<string> specialSymbols;

    unordered_set<string> keywords;

    unordered_set<string> literals;

 

    string input;

    stringstream ss;

 

    cout << "Enter the C++ program (end input with two consecutive Enter presses):" << endl;

 

    string line;

    bool prevLineEmpty = false;

 

    while (getline(cin, line))

    {

        if (line.empty())

        {

            if (prevLineEmpty)

            {

                break;

            }

            prevLineEmpty = true;

        }

        else

        {

            prevLineEmpty = false;

            ss << line << '\n';

        }

    }

 

    auto tokenize = [](const string &str)

    {

        vector<string> tokens;

        string token;

        bool inString = false;

 

        for (char ch : str)

        {

            if (ch == '"')

            {

                if (inString)

                {

                    token += ch;

                    tokens.push_back(token);

                    token.clear();

                }

                else

                {

                    if (!token.empty())

                    {

                        tokens.push_back(token);

                        token.clear();

                    }

                    inString = true;

                    token += ch;

                }

            }

            else if (isspace(ch) || ch == ';' || ch == ',' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '.' || ch == '-' || ch == '+' || ch == '*' || ch == '/' || ch == '%' || ch == '=' || ch == '!' || ch == '<' || ch == '>' || ch == '&' || ch == '|' || ch == '^' || ch == '~')

            {

                if (inString)

                {

                    token += ch;

                }

                else

                {

                    if (!token.empty())

                    {

                        tokens.push_back(token);

                        token.clear();

                    }

                    if (ch != ' ')

                    {

                        tokens.push_back(string(1, ch));

                    }

                }

            }

            else

            {

                token += ch;

            }

        }

        if (!token.empty())

        {

            tokens.push_back(token);

        }

        return tokens;

    };

 

    stringstream inputStream(ss.str());

    string lineContent;

    while (getline(inputStream, lineContent))

    {

        vector<string> tokens = tokenize(lineContent);

        for (const string &token : tokens)

        {

            if (isKeyword(token))

            {

                keywords.insert(token);

            }

            else if (isdigit(token[0]) || (token[0] == '"' && token.back() == '"'))

            {

                literals.insert(token);

            }

            else if (isOperator(token))

            {

                operators.insert(token);

            }

            else if (isSpecialSymbol(token))

            {

                specialSymbols.insert(token);

            }

            else

            {

                identifiers.insert(token);

            }

        }

    }

 

    cout << "Identifiers (" << identifiers.size() << " distinct):" << endl;

    for (const string &id : identifiers)

    {

        cout << id << endl;

    }

 

    cout << "\nOperators (" << operators.size() << " distinct):" << endl;

    for (const string &op : operators)

    {

        cout << op << endl;

    }

 

    cout << "\nSpecial Symbols (" << specialSymbols.size() << " distinct):" << endl;

    for (const string &sym : specialSymbols)

    {

        cout << sym << endl;

    }

 

    cout << "\nKeywords (" << keywords.size() << " distinct):" << endl;

    for (const string &kw : keywords)

    {

        cout << kw << endl;

    }

 

    cout << "\nLiterals (" << literals.size() << " distinct):" << endl;

    for (const string &lit : literals)

    {

        cout << lit << endl;

    }

 

    return 0;

}

 

OUTPUT:








Date: 13/08/2024

EXPERIMENT NO. – 2

 

AIM: Write a LEX program to count the number of different types of tokens in a given program to simulate Tokenisation in Lexical phase of Compiler Design.

 

SOFTWARE USED : FLEX 

 

THEORY:

LEX is a tool used to generate lexical analyzers that process text and classify tokens based on predefined patterns. It is integral in compiling and interpreting as it breaks down input text into manageable components for further analysis.

 

Structure of a LEX Program:

%{

    // Definitions Section: Include headers and macros

%}

 

%%

    // Rules Section: Define patterns and actions

%%

 

    // User Code Section: Implement main function and additional logic

 

Workflow:

  1. Write a LEX File: Define patterns and actions in the ‘.l’ file.
  2. Generate C Source File: Use flex ‘filename.l’ to produce ‘lex.yy.c’.
  3. Compile the C File: Use ‘gcc lex.yy.c’ to create the executable ‘a.exe’.
  4. Run the Lexer: Execute ‘./a.exe’ to process input and produce token output.

 

CODE:

%{

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

 

#define MAX_TOKENS 100

 

char *identifiers[MAX_TOKENS];

char *operators[MAX_TOKENS];

char *specialSymbols[MAX_TOKENS];

char *keywords[MAX_TOKENS];

char *literals[MAX_TOKENS];

 

int idCount = 0, opCount = 0, ssCount = 0, kwCount = 0, litCount = 0;

 

int exists(char **list, int count, char *token) {

    for (int i = 0; i < count; i++) {

        if (strcmp(list[i], token) == 0) {

            return 1;

        }

    }

    return 0;

}

 

void addToken(char **list, int *count, char *token) {

    if (!exists(list, *count, token)) {

        list[*count] = strdup(token);

        (*count)++;

    }

}

 

%}

 

%option noyywrap

 

%%

 

"int"|"float"|"double"|"char"|"void"|"return"|"if"|"else"|"for"|"while"|"do"|"switch"|"case"|"default"|"break"|"continue"|"class"|"public"|"private"|"protected"|"static"|"const"|"typedef"|"namespace"|"using"|"template"|"try"|"catch"|"throw"|"virtual" {

    addToken(keywords, &kwCount, yytext);

}

 

[0-9]+(\.[0-9]+)?|"\"[^\"]*\"" {

    addToken(literals, &litCount, yytext);

}

 

"+"|"-"|"*"|"/"|"%"|"++"|"--"|"=="|"!="|"<"|">"|"<="|">="|"&&"|"||"|"!"|"="|"+="|"-="|"*="|"/="|"%="|">>"|"<<"|"&"|"|"|"^"|"~" {

    addToken(operators, &opCount, yytext);

}

 

"{"|"}"|"("|")"|";"|","|"["|"]"|"->"|"::" {

    addToken(specialSymbols, &ssCount, yytext);

}

 

[a-zA-Z_][a-zA-Z0-9_]* {

    addToken(identifiers, &idCount, yytext);

}

 

[ \t\n]       { }

 

.             { }

 

%%

 

int main() {

    printf("Enter the C++ code (end input by pressing Enter twice):\n");

   

    char input[1024];

    int emptyLineCount = 0;

 

    while (fgets(input, sizeof(input), stdin)) {

        if (strcmp(input, "\n") == 0) {

            emptyLineCount++;

            if (emptyLineCount == 2) {

                break;

            }

        } else {

            emptyLineCount = 0;

            YY_BUFFER_STATE buffer = yy_scan_string(input);

            yylex();

            yy_delete_buffer(buffer);

        }

    }

 

    printf("Identifiers (%d distinct):\n", idCount);

    for (int i = 0; i < idCount; i++) {

        printf("%s\n", identifiers[i]);

    }

 

    printf("\nOperators (%d distinct):\n", opCount);

    for (int i = 0; i < opCount; i++) {

        printf("%s\n", operators[i]);

    }

 

    printf("\nSpecial Symbols (%d distinct):\n", ssCount);

    for (int i = 0; i < ssCount; i++) {

        printf("%s\n", specialSymbols[i]);

    }

 

    printf("\nKeywords (%d distinct):\n", kwCount);

    for (int i = 0; i < kwCount; i++) {

        printf("%s\n", keywords[i]);

    }

 

    printf("\nLiterals (%d distinct):\n", litCount);

    for (int i = 0; i < litCount; i++) {

        printf("%s\n", literals[i]);

    }

 

    return 0;

}


OUTPUT:








Comments

Popular Posts