Rust Lessons

Learn Rust by staring at code.

Lesson 3: CAT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <windows.h>
#include <stdio.h>
#include <tchar.h>

#define BUF_SIZE 512 // (1) BUF_SIZE macro definition: preprocessor replaces with 
                     // literal 512 (0x200 in hex). Compiler allocates no memory;
                     // simply substitutes. Impacts stack buffer size in CatFile.
                     // 512 chosen as a common I/O buffer size.

static void CatFile(HANDLE hIn, HANDLE hOut);
static void ReportError(LPCTSTR msg, DWORD errCode, BOOL showErrMsg);

int _tmain(int argc, TCHAR* argv[]) {
    HANDLE hIn, hStdIn = GetStdHandle(STD_INPUT_HANDLE);
    HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
    BOOL dashS = FALSE;
    int iFirstFile = 1;

    /* Flow Step 1a: Parse Options */
    for (int i = 1; i < argc; i++) {
        if (_tcscmp(argv[i], _T("-s")) == 0) {
            dashS = TRUE;
            iFirstFile++;
        }
        else if (argv[i][0] == _T('-')) {
            iFirstFile++;
        }
        else {
            break;
        }
    }

    /* Flow Step 2: Input Source Decision */
    if (iFirstFile >= argc) {
        CatFile(hStdIn, hStdOut);
        return 0;
    }

    /* Flow Step 3: File Processing Loop */
    for (int i = iFirstFile; i < argc; i++) {
        hIn = CreateFile(argv[i], GENERIC_READ, 0, NULL,
            OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);

        if (hIn == INVALID_HANDLE_VALUE) {
            if (!dashS) 
                ReportError(_T("File open error"), GetLastError(), TRUE);
            continue;
        }

        CatFile(hIn, hStdOut);
    
        DWORD err = GetLastError();
        if (err != 0 && !dashS) {
            ReportError(_T("Processing error"), err, TRUE);
        }

        CloseHandle(hIn);
    }
    return 0;
}

static void CatFile(HANDLE hIn, HANDLE hOut) {
    BYTE buffer[BUF_SIZE];
    DWORD nRead, nWritten;

    while (ReadFile(hIn, buffer, BUF_SIZE, &nRead, NULL) && nRead > 0) {
        if (!WriteFile(hOut, buffer, nRead, &nWritten, NULL) 
            || nWritten != nRead) {
            break;
        }
    }
}

static void ReportError(LPCTSTR msg, DWORD errCode, BOOL showErrMsg) {
    _ftprintf(stderr, _T("ERROR: %s"), msg);

    if (showErrMsg) {
        LPVOID errMsg;
        FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
            FORMAT_MESSAGE_FROM_SYSTEM,
            NULL, errCode, 0,
            (LPTSTR)&errMsg, 0, NULL);

        _ftprintf(stderr, _T(" (%s)"), (LPTSTR)errMsg);
        LocalFree(errMsg);
    }
    _ftprintf(stderr, _T("\n"));
}

Rust Implementation 🦀

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
use std::env; // (1) Module import: Compiler resolves symbolic references, linking 
                // against the `env` module's compiled object code (static/dynamic).
                // Triggers inclusion of associated data structures and functions into 
                // the current compilation unit, affecting final binary size.

use std::ffi::OsString;// (2) Deep path resolution: Compiler navigates module namespaces,  
                    // loading necessary type definitions. Ensures type safety at
                    // compile time, optimizing memory layout according to  `OsString`'s
                    // internal representation (likely platform-dependent).

use std::fs::File;    // (3) `File` potentially wraps OS file handles (integers) abstracting
                       //   low-level I/O operations. Syscalls like `open`, `read`, incurred on usage.
                        // Implies runtime overhead for kernel transitions compared to basic stack operations.

use std::io::{self, Read, Write};// (4) Traits define abstract interfaces, promoting code
                         // reusability without compile-time type knowledge. Enables
                            // polymorphism via virtual function tables if dynamic dispatch is needed, increasing 
                             //indirect memory access costs.
const BUF_SIZE: usize = 512; // (5) Compiler replaces `BUF_SIZE` with the literal `512`
                             //  during lexical analysis phase.  Impacts memory
                              // allocation of the `buffer` at a deeper scope (on stack likely).

fn main() {                 // (6) `main` is the entry point (defined by C++ runtime spec.).
                              //   Compiler emits code for stack frame setup(pushing EBP/RBP, ESP/RSP), 
                             // ensuring program state before function entry (essential for proper unwind/return).

    let (dash_s, files) = parse_args();// (7) Tuple construction & destructuring involves move semantics
                                  // potentially avoiding costly copies if  `parse_args` returns temporaries (rvalues).
                                    //   Registers potentially used for passing smaller data (dash_s); stack frame expanded 
                                     //     if files exceeds register capacity.

    
    if files.is_empty() {        // (8) Branching instruction emitted: Conditional check of boolean `files.is_empty()` 
                              //      influencing program counter value to either skip the block or enter (conditional jump).
      cat_stream(&mut io::stdin(), &mut io::stdout(), dash_s);// (9) Function call: Parameters are pushed to stack. x86-64 uses registers initially; then overflows onto the stack).
                              //Calling conventions influence argument layout (e.g., System V AMD64 ABI). Runtime resolves `stdin/stdout` indirection.

        return;                // (10) Stack unwinding occurs: Frame pointer restored (ESP/RSP popped). Potential
                              // flushing/invalidation of cache lines occurs, influenced by hardware specs. Return value likely stored in EAX/RAX register.

    }

    for path in files {     // (11) Iterator abstraction: Underneath, `files` yields iterators implemented as a trait 
                               // object (pointer to virtual function table). Every `path` access indirects
                              //through this table adding minor overhead (compared to direct struct access)


        match File::open(&path) { //(12) Error handling via `Result` type involves discriminant based control flow
                         // with enum layouts managed at compile time (`Ok`, `Err`  mapped to data segments internally).
            Ok(mut file) => {   // (13) Pattern matching;Compiler evaluates discriminant at runtime to select branch.
                           //  Mutable borrow (file): Unique access guaranteed; Compiler enforces this at compilation via borrow checker preventing data races.
                if let Err(e) = cat_stream(&mut file, &mut io::stdout(), dash_s) {  // (14) Nested Error Handling
                       //Runtime evaluates return value from cat_stream to detect possible failure paths (`io::Result` unpacking).
                    if !dash_s {    // (15) Boolean NOT involves inverting a bit (likely with XOR instruction at machine level); 
                          // branch based on  Zero flag or setting based on dash_s truthiness.

                        eprintln!("Error processing file: {}: {}", path.to_string_lossy(), e); // (16) String formatting incurs dynamic memory operations on heap for new strings constructed;
                              // System call involved (writes to stderr - likely file descriptor 2) inducing kernel context switch costs.


                    }
                }
            }
            Err(e) => {     // (17) Compiler ensures exhaustive pattern match of all `Result` variants to ensure safety & well-defined paths at runtime.
                if !dash_s {     // (18) Redundant code may be eliminated if optimizer detects its semantic equivalence
                               //      with the `if !dash_s` condition at block (14), possibly through Common Subexpression Elimination.

                    eprintln!("Error opening file: {}: {}", path.to_string_lossy(), e);// (19) Duplication - potential optimization by function extraction; Compiler might still
                              //   inline if optimization levels are high, incurring inlining overhead with a reduced code footprint trade-off.

                }
            }
        }
    }
}

fn parse_args() -> (bool, Vec<OsString>) {  // (20) Return type implications -  ABI dictates how `bool` (potentially byte/word aligned), 
                                             //    and heap-allocated `Vec`  (data pointer with size/capacity fields) is returned. This impacts how data structures are
                                         //           handled, either in dedicated registers or using address/value transmission on the stack frame.


    let mut dash_s = false;// (21) `dash_s` likely resides on the stack. Might be promoted
                         //   to register if highly used by loop or function to reduce memory accesses at runtime, improving perf.

    let mut files = vec![];// (22) `vec![]` invokes the `Vec::new()` constructor underneath, which will perform 
                          //  an initial heap allocation managed by Rust's memory manager. Minimum size set by allocation strategy.


    
    for arg in env::args_os().skip(1) {// (23) Iterator chain `.skip(1)` induces internal creation of a new adapter iterator without consuming 
                                      //  underlying `env::args_os`. Overhead negligible compared to OS API interaction; However stack frame usage per adapter is relevant.


        if arg == "-s" {          // (24)  String comparison `arg == "-s"` may be optimized to direct
                           //      pointer comparison if both operands are string literals allowing CPU's branch prediction to perform efficiently

            dash_s = true;          // (25) Direct boolean assignment sets internal bit value to 1 within `dash_s`. Change could lead
                         //  to cache line invalidations with CPU architecture effects if this location also shared across other threads

        } else if arg.to_str().map_or(false, |s| s.starts_with('-')) {  // (26) `map_or`: Branching logic generated by the compiler around the 
                        //  Optional type for `to_str`. Short circuiting of `starts_with('-')` evaluation minimizes redundant work improving hot-path perf.
            continue;                // (27) `continue` modifies the loop counter causing an unconditional jump to next iteration. Jump impacts CPU pipeline slightly (predictability dependent).
                                        // CPU prefetcher would make optimizations based on assumed next path

        } else {
            files.push(arg);      // (28) `push` may require heap reallocation. If the current allocation can hold `arg`, realloc avoided (perf boost). 
                            // Capacity preallocation strategies in `Vec` internals matter in cost amortization

        }
    }
    
    (dash_s, files)        // (29) Tuple returned – underlying representation possibly flattened by compiler. 
                            // Potential copy of elements (especially for files' pointer) onto a contiguous
                             //  block in a special stack/register-based temporary slot determined by ABI/compiler.  

}


fn cat_stream<R: Read, W: Write>(// (30) Generics:  Monomorphization by compiler for used `R` and `W` types
                               //at each invocation. Code bloat potential for multiple `R` and `W` combinations (vs virtual functions: runtime overhead)
    input: &mut R,         // (31)  `&mut R` :Mutable references  at a machine code level become pointers to data structures at potentially randomized
                              //memory locations post-ASLR. Data structures (like vtables for virtual calls on trait objects like `Read`) located elsewhere.
    output: &mut W,
suppress_errors: bool, // (32) Value is typically moved/copied to stack location, possibly even to CPU
                        //register for quick/easy lookup with low overhead on every loop iteration (perf. improvement via branch-predict assist)  .

) -> io::Result<()> {         // (33) Return Type convention defines ABI compatibility – error path (Err), data pointer in registers or stack determined at compile time.
                            // Compiler guarantees consistency among all instantiations of this templated function to satisfy the externally observable behavior implied by its declared API
    let mut buffer = [0u8; BUF_SIZE];  // (34) Array `buffer`: `512` contiguous bytes on stack; might span
                         //across multiple cache lines impacting performance if accesses not localized;

    
    loop {                  // (35) Loop implies conditional backward branch (`jmp` instruction at a lower level). Runtime check against the outcome
                        //   of `input.read`, altering control flow (PC manipulation via condition flags evaluation post reading ops).

        let bytes_read = match input.read(&mut buffer) {  // (36) Runtime branch –`match` compiles into conditional
                          //jump instructions (likely involving CMP at assembly). Code organization optimized based on enum tag (discriminant) of `Result`.

            Ok(0) => break,            // (37) `break` involves modifying PC to skip current loop. Jump instructions set for specific locations;  may hinder instruction-level parallelism
                           //with pipelining if branch prediction poor since target determined only upon branch reach
            Ok(n) => n,             // (38) Data moved directly onto the stack (`bytes_read`); compiler may potentially eliminate `bytes_read` variable altogether if further usage only
                          //occurs in a tail position (via optimization – e.g., using value instead of another load instruction), demonstrating register allocation benefits.


            Err(e) if suppress_errors => return Ok(()), //(39) Conditional error suppression :Compiler emits a test for`suppress_errors`. If false, standard error path taken; 
                        //otherwise short-circuits to early return skipping potentially extensive unwind operations of a deep error path

            Err(e) => return Err(e),    // (40) Error propagation. Deeply returning error information, involving 
                              //either copying it to a result struct/register (potential overhead depending on error value size, if it isn't just an integer enum discriminant) or wrapping existing error representation

        };

        let mut bytes_written = 0;      // (41) `bytes_written` initialization to zero translates directly into CPU instructions like `mov` to initialize memory location, consuming CPU cycles
                           //and potentially affecting instruction level parallelism (blocking a write-back until dependencies clear on the execution path).  

        while bytes_written < bytes_read {  //(42) Inner Loop –  Compiled into another conditional branch relying on comparing 
                           //memory location associated to `bytes_written` with `bytes_read` with outcomes impacting instruction pipeline predictability (similar considerations apply to all loops).

            let write_result = output.write(&buffer[bytes_written..bytes_read]);//(43) Slice (`buffer[...]`) calculated at runtime : Implies starting memory address
                                //      for slice start  by offsetting base address via  `bytes_written`, requiring additions potentially at machine code level
                              //    depending on compiler optimizations – impacting arithmetic pipelines  (but potentially minor given common sub-expression/propensity for reordering instructions)


            
            match write_result {            // (44) Duplicate pattern with potential optimization: If compiler detects no external side effects, a single match construct encompassing lines (36) 
                                       //and (44) might be achievable, improving branch prediction (fewer instructions by factoring common branching pathways – a gain from enhanced code locality).  


                Ok(n) => bytes_written += n,// (45) `+=` operator – underlying read-modify-write operation (potential non-atomicity on hardware) incurring memory access and CPU operation latency, affecting execution path.


                Err(e) if suppress_errors => return Ok(()),//(46) Error suppression behavior possibly determined by conditional moves (CMOV) based on `suppress_error`, optimizing branching at CPU level depending on architecture specifics 
                            //(if it avoids a JMP then instruction pipe less sensitive to outcome of condition evaluation).

                Err(e) => return Err(e),  //(47)  Same error semantics as line 40 -- but at deeper level -compiler possibly performs stack-level optimizations 
                       //to collapse the redundant behavior (epilogue/error data copies) depending on knowledge on call paths via static analysis and optimization scope constraints..


            }
        }
    }
    
    Ok(())              // (48) Implies an implicit move operation copying the unit type () to stack/register 
                          //for return handling (as per the specified calling convention ABI constraints). Usually involves zero/no-op moves given () has a trivially determined internal memory layout
                         // with optimization often leading to just register manipulations and/or leaving appropriate value in a predetermined space to be fetched by the calling function).

}

Exercise: Implement in C++