- commit
- 3f80acc
- parent
- 640864f
- author
- xplshn
- date
- 2025-09-10 01:45:44 +0000 UTC
I ACCIDENTALLY MADE MY OWN PROGRAMMING LANGUAGE. Signed-off-by: xplshn <[email protected]>
M
Makefile
+2,
-2
1@@ -39,7 +39,7 @@ $(GTEST):
2
3 clean:
4 @echo "Cleaning up..."
5- @rm -f $(OUT) $(GTEST) ./a.out .test_results.json
6+ @rm -f $(OUT) $(GTEST) ./gbc ./cmd/gtest/gtest ./a.out ./.test_results.json
7
8 ARCH := $(shell uname -m)
9 OS := $(shell uname -s)
10@@ -62,7 +62,7 @@ endef
11 test: all $(GTEST)
12 @echo "Running tests..."
13 @files=$$( $(call filter_files,tests/*.b*,tests) ); \
14- ./cmd/$(GTEST)/$(GTEST) --test-files="$$files" --target-args="$(GBCFLAGS) $(LIBB)" -v
15+ ./cmd/$(GTEST)/$(GTEST) --test-files="$$files" --target-args="$(GBCFLAGS) $(LIBB)" -v --ignore-lines="addresses"
16
17 examples: all $(GTEST)
18 @echo "Running examples..."
+7,
-4
1@@ -66,7 +66,7 @@ This compiler is a project aiming to make a valid B compiler, with _optional_ sy
2 type Warn about type mismatches in expressions and assignments. |x|
3 u-esc Warn on unrecognized character escape sequences. |x|
4 unreachable-code Warn about code that will never be executed. |x|
5-]~/Documents/TrulyMine/gbc@
6+]~/Documents/TrulyMine/gbc@
7 ```
8
9 ### Progress Report:
10@@ -74,7 +74,10 @@ This compiler is a project aiming to make a valid B compiler, with _optional_ sy
11 - Capable of compiling all examples. Producing the same output as the reference B compiler, against the same STDIN and argument inputs.
12 - Etc, these are just the most impressive examples
13 - I added a completely opt-in type system. It uses type first declarations like C, and uses the Go type names. (can also be used with strict B via `-std=B -Ftyped`, the syntax is backwards compatible. Its so reliable it comes enabled by default.)
14-- `gbc` will warn about poorly written code. TODO: Convert these warnings into annotations that offer suggestions.
15+- `gbc`'s warnings warn against common errors, poor decisions, etc
16+- Directives are supported
17+- Meta-programming: W.I.P
18+- Borrow-checking: Working on that!!! Will probably be the last feature of GBC once the most essential stuff is addressed
19 - Portable and with multiple backends:
20 - QBE (default, via modernc.org/libQBE, a pure Go version of QBE)
21 - LLVM (via `llc`)
22@@ -99,8 +102,8 @@ The project is currently in its infancy, and the long-term goals are very ambiti
23 > 1. ~~Support the "extrn" keyword, as well as inline assembly~~
24 > 2. ~~Use the same warning & error messages [tsoding/b](https://github.com/tsoding/b)~~ / our warnings n errors are much better
25 > 3. ~~Be able to pass the IR tests of [tsoding/b](https://github.com/tsoding/b)~~
26-> 4. A gameboy color target once _all_ examples can be compiled and work as expected
27->
28+> 4. A gameboy color target once _all_ examples can be compiled and work as expected (WIP)
29+>
30 > ###### (iii) Packages / Modules inspired by Go
31 > * ¿.. Namespaces based on .mod file ..?
32 > * Implement a way to import/export symbols from different .B files, in different namespaces
+39,
-23
1@@ -22,7 +22,7 @@ import (
2 func main() {
3 app := cli.NewApp("gbc")
4 app.Synopsis = "[options] <input.b> ..."
5- app.Description = "A compiler for the B programming language and its extensions, written in Go."
6+ app.Description = "A compiler for the B programming language with modern extensions. Like stepping into a time machine, but with better error messages."
7 app.Authors = []string{"xplshn"}
8 app.Repository = "<https://github.com/xplshn/gbc>"
9 app.Since = 2025
10@@ -36,11 +36,13 @@ func main() {
11 userIncludePaths []string
12 libRequests []string
13 pedantic bool
14+ dumpIR bool
15 )
16
17 fs := app.FlagSet
18 fs.String(&outFile, "output", "o", "a.out", "Place the output into <file>.", "file")
19 fs.String(&target, "target", "t", "qbe", "Set the backend and target ABI.", "backend/target")
20+ fs.Bool(&dumpIR, "dump-ir", "d", false, "Dump the intermediate representation and exit.")
21 fs.List(&userIncludePaths, "include", "I", []string{}, "Add a directory to the include path.", "path")
22 fs.List(&linkerArgs, "linker-arg", "L", []string{}, "Pass an argument to the linker.", "arg")
23 fs.List(&compilerArgs, "compiler-arg", "C", []string{}, "Pass a compiler-specific argument (e.g., -C linker_args='-s').", "arg")
24@@ -51,14 +53,14 @@ func main() {
25 cfg := config.NewConfig()
26 warningFlags, featureFlags := cfg.SetupFlagGroups(fs)
27
28- // Actual compilation pipeline
29+ // Main compilation pipeline
30 app.Action = func(inputFiles []string) error {
31- // Handle pedantic flag first, as it can affect other settings.
32+ // Pedantic flag affects everything else
33 if pedantic {
34 cfg.SetWarning(config.WarnPedantic, true)
35 }
36
37- // Apply warning flag updates to config
38+ // Apply warning flags
39 for i, entry := range warningFlags {
40 if entry.Enabled != nil && *entry.Enabled {
41 cfg.SetWarning(config.Warning(i), true)
42@@ -68,7 +70,7 @@ func main() {
43 }
44 }
45
46- // Apply feature flag updates to config
47+ // Apply feature flags
48 for i, entry := range featureFlags {
49 if entry.Enabled != nil && *entry.Enabled {
50 cfg.SetFeature(config.Feature(i), true)
51@@ -83,15 +85,15 @@ func main() {
52 util.Error(token.Token{}, err.Error())
53 }
54
55- // Set target, defaulting to the host if not specified
56+ // Set target architecture
57 cfg.SetTarget(runtime.GOOS, runtime.GOARCH, target)
58
59- // Populate config from parsed command-line flags
60+ // Copy over command line settings
61 cfg.LinkerArgs = append(cfg.LinkerArgs, linkerArgs...)
62 cfg.LibRequests = append(cfg.LibRequests, libRequests...)
63 cfg.UserIncludePaths = append(cfg.UserIncludePaths, userIncludePaths...)
64
65- // Process compiler-specific arguments (-C)
66+ // Handle compiler args (-C)
67 for _, carg := range compilerArgs {
68 if parts := strings.SplitN(carg, "=", 2); len(parts) == 2 && parts[0] == "linker_args" {
69 parsedArgs, err := config.ParseCLIString(parts[1])
70@@ -102,12 +104,12 @@ func main() {
71 }
72 }
73
74- // PASS 1: Tokenize and parse initial files to process directives.
75+ // First pass: scan for directives
76 fmt.Println("----------------------")
77 records, allTokens := readAndTokenizeFiles(inputFiles, cfg)
78 util.SetSourceFiles(records)
79 p := parser.NewParser(allTokens, cfg)
80- p.Parse() // populates cfg with directive info
81+ p.Parse() // picks up directives
82
83 // Now that all directives are processed, determine the final list of source files.
84 finalInputFiles := processInputFiles(inputFiles, cfg)
85@@ -115,7 +117,7 @@ func main() {
86 util.Error(token.Token{}, "no input files specified.")
87 }
88
89- // PASS 2: Re-tokenize and parse the complete set of files for compilation.
90+ // Second pass: compile everything
91 isTyped := cfg.IsFeatureEnabled(config.FeatTyped)
92 fmt.Printf("Tokenizing %d source file(s) (Typed Pass: %v)...\n", len(finalInputFiles), isTyped)
93 fullRecords, fullTokens := readAndTokenizeFiles(finalInputFiles, cfg)
94@@ -125,33 +127,45 @@ func main() {
95 fullParser := parser.NewParser(fullTokens, cfg)
96 astRoot := fullParser.Parse()
97
98- fmt.Println("Constant folding...")
99+ fmt.Println("Folding constants...")
100 astRoot = ast.FoldConstants(astRoot)
101
102- if cfg.IsFeatureEnabled(config.FeatTyped) { // Re-check after directives
103+ if cfg.IsFeatureEnabled(config.FeatTyped) { // recheck after directive processing
104 fmt.Println("Type checking...")
105 tc := typeChecker.NewTypeChecker(cfg)
106 tc.Check(astRoot)
107 }
108
109- fmt.Println("Generating backend-agnostic IR...")
110+ fmt.Println("Creating intermediate representation...")
111 cg := codegen.NewContext(cfg)
112 irProg, inlineAsm := cg.GenerateIR(astRoot)
113
114- fmt.Printf("Generating target code with '%s' backend...\n", cfg.BackendName)
115+ // Handle --dump-ir/-d flag
116+ if dumpIR {
117+ fmt.Printf("Dumping IR for '%s' backend...\n", cfg.BackendName)
118+ backend := selectBackend(cfg.BackendName)
119+ irText, err := backend.GenerateIR(irProg, cfg)
120+ if err != nil {
121+ util.Error(token.Token{}, "backend IR generation failed: %v", err)
122+ }
123+ fmt.Print(irText)
124+ return nil
125+ }
126+
127+ fmt.Printf("Generating code with '%s' backend...\n", cfg.BackendName)
128 backend := selectBackend(cfg.BackendName)
129 backendOutput, err := backend.Generate(irProg, cfg)
130 if err != nil {
131 util.Error(token.Token{}, "backend code generation failed: %v", err)
132 }
133
134- fmt.Printf("Assembling and linking to create '%s'...\n", outFile)
135+ fmt.Printf("Linking to create '%s'...\n", outFile)
136 if err := assembleAndLink(outFile, backendOutput.String(), inlineAsm, cfg.LinkerArgs); err != nil {
137 util.Error(token.Token{}, "assembler/linker failed: %v", err)
138 }
139
140 fmt.Println("----------------------")
141- fmt.Println("Compilation successful!")
142+ fmt.Println("Done!")
143 return nil
144 }
145
146@@ -190,8 +204,10 @@ func processInputFiles(args []string, cfg *config.Config) []string {
147
148 func selectBackend(name string) codegen.Backend {
149 switch name {
150- case "qbe": return codegen.NewQBEBackend()
151- case "llvm": return codegen.NewLLVMBackend()
152+ case "qbe":
153+ return codegen.NewQBEBackend()
154+ case "llvm":
155+ return codegen.NewLLVMBackend()
156 default:
157 util.Error(token.Token{}, "unsupported backend '%s'", name)
158 return nil
159@@ -237,10 +253,10 @@ func assembleAndLink(outFile, mainAsm, inlineAsm string, linkerArgs []string) er
160 }
161 mainAsmFile.Close()
162
163- // TODO: We want PIE support
164- // - Fix LLVM backend to achieve that
165- // - Our QBE backend seems to have some issues with PIE as well, but only two cases fail when doing `make examples`
166- // We should, by default, use `-static-pie`
167+ // PIE support needs work:
168+ // - LLVM backend has issues
169+ // - QBE backend mostly works but fails on a couple examples
170+ // Should default to `-static-pie` eventually
171 ccArgs := []string{"-no-pie", "-o", outFile, mainAsmFile.Name()}
172 if inlineAsm != "" {
173 inlineAsmFile, err := os.CreateTemp("", "gbc-inline-*.s")
+9,
-10
1@@ -34,9 +34,9 @@ type Execution struct {
2 }
3
4 type TestRun struct {
5- Name string `json:"name"`
6- Args []string `json:"args,omitempty"`
7- Input string `json:"input,omitempty"`
8+ Name string `json:"name"`
9+ Args []string `json:"args,omitempty"`
10+ Input string `json:"input,omitempty"`
11 Result Execution `json:"result"`
12 }
13
14@@ -109,7 +109,7 @@ func main() {
15 handleRunTestSuite(tempDir)
16 }
17
18-// setupInterruptHandler is used to clean up on CTRL+C
19+// Cleanup on Ctrl+C
20 func setupInterruptHandler(tempDir string) {
21 c := make(chan os.Signal, 1)
22 signal.Notify(c, os.Interrupt)
23@@ -129,7 +129,7 @@ func getJSONPath(sourceFile string) string {
24 return filepath.Join(filepath.Dir(sourceFile), jsonFileName)
25 }
26
27-// hashFile computes the xxhash of a file's content
28+// Fast hash of file contents
29 func hashFile(path string) (string, error) {
30 f, err := os.Open(path)
31 if err != nil {
32@@ -191,7 +191,7 @@ func handleRunTestSuite(tempDir string) {
33 return
34 }
35
36- // Load previous results for caching reference compiler output
37+ // Check if there's cached results to speed things up
38 previousResults := make(TestSuiteResults)
39 outputFile := *outputJSON
40 if *jsonDir != "" {
41@@ -481,7 +481,7 @@ func compareRuntimeResults(file string, refResult, targetResult *TargetResult) *
42 }
43 }
44
45-// executeCommand runs a command with a timeout and captures its output, optionally piping data to stdin
46+// Run a command with timeout, capturing output and optionally feeding stdin
47 func executeCommand(ctx context.Context, command string, stdinData string, args ...string) Execution {
48 startTime := time.Now()
49 cmd := exec.CommandContext(ctx, command, args...)
50@@ -539,8 +539,7 @@ func compileAndRun(compiler string, compilerArgs []string, sourceFile, tempDir,
51 return &TargetResult{Compile: compileResult}, fmt.Errorf("compilation succeeded but binary was not created at %s", binaryPath)
52 }
53
54- // Probe to see if the binary waits for stdin by running it with a very short timeout
55- // If it times out, it's likely waiting for input
56+ // Quick test: does this program expect stdin?
57 probeCtx, probeCancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
58 defer probeCancel()
59 probeResult := executeCommand(probeCtx, binaryPath, "")
60@@ -631,7 +630,7 @@ func compileAndRun(compiler string, compilerArgs []string, sourceFile, tempDir,
61 return &TargetResult{Compile: compileResult, Runs: runResults, BinaryPath: binaryPath}, nil
62 }
63
64-// filterOutput removes lines containing any of the given substrings
65+// Remove lines containing any of these substrings
66 func filterOutput(output string, ignoredSubstrings []string) string {
67 if len(ignoredSubstrings) == 0 || output == "" {
68 return output
+629,
-0
1@@ -0,0 +1,629 @@
2+# The Bx Language Specification
3+
4+## Introduction
5+
6+Bx is a modern extension of the B programming language, preserving the elegant simplicity of Dennis Ritchie's original implementation while adding essential features for contemporary systems programming. Unlike B, Bx introduces an optional and backwards compatible type system, structured data types, floating-point arithmetic, and modern syntax conventions.
7+
8+Bx maintains full backward compatibility with B programs while offering opt-in enhancements through feature flags.
9+
10+## Language Philosophy
11+
12+Bx follows these core principles:
13+
14+1. **Backward Compatibility**: All valid B programs remain valid in Bx
15+2. **Optional Enhancement**: Modern features are opt-in via feature flags
16+3. **Simplicity**: Minimal syntax with maximum expressiveness
17+4. **Systems Programming**: Direct memory manipulation and efficient compilation
18+
19+## Type System
20+
21+### Overview
22+
23+The Bx type system is optional and backward-compatible. When the `typed` feature is disabled, Bx behaves exactly like B with untyped word values. When enabled, Bx provides static type checking while preserving B's flexibility.
24+
25+### Primitive Types
26+
27+Bx supports a comprehensive set of primitive types:
28+
29+```bx
30+// Integer types
31+int // Platform word size (32 or 64 bits)
32+uint // Unsigned platform word size
33+int8 // 8-bit signed integer
34+uint8 // 8-bit unsigned integer
35+int16 // 16-bit signed integer
36+uint16 // 16-bit unsigned integer
37+int32 // 32-bit signed integer
38+uint32 // 32-bit unsigned integer
39+int64 // 64-bit signed integer
40+uint64 // 64-bit unsigned integer
41+byte // Alias for uint8
42+
43+// Floating-point types (requires float feature)
44+float // Platform-native floating point
45+float32 // 32-bit IEEE 754 floating point
46+float64 // 64-bit IEEE 754 floating point
47+
48+// Other types
49+bool // Boolean (true/false)
50+string // String type, alias to byte*
51+void // Absence of value
52+any // Type that matches anything
53+```
54+
55+### Type Declarations
56+
57+Variables can be declared with explicit types:
58+
59+```bx
60+int32 counter = 0;
61+float64 pi = 3.141592653589793;
62+bool is_valid = true;
63+string name = "Bx Language";
64+```
65+
66+### Pointer Types
67+
68+Pointers are declared using the `*` suffix:
69+
70+```bx
71+int *ptr; // Pointer to int
72+byte *buffer; // Pointer to byte
73+float32 *matrix; // Pointer to float32
74+```
75+
76+Pointer arithmetic follows C conventions:
77+
78+```bx
79+int arr[10];
80+int *p = arr;
81+p = p + 1; // Points to arr[1]
82+*p = 42; // Sets arr[1] = 42
83+```
84+
85+### Array Types
86+
87+Arrays can be declared in two forms:
88+
89+```bx
90+// Fixed-size arrays
91+int numbers[100]; // 100 integers
92+byte buffer[1024]; // 1024 bytes
93+
94+// Dynamic arrays (slice notation)
95+[]int dynamic_array; // Pointer to int with length semantics
96+[]byte text_buffer; // Pointer to bytes
97+```
98+
99+Fixed arrays are allocated on the stack, while dynamic arrays typically point to heap-allocated memory.
100+
101+## Memory Layout and Alignment
102+
103+### Word Size and Alignment
104+
105+Bx follows platform-specific alignment requirements:
106+
107+| Architecture | Word Size | Stack Alignment | Pointer Size |
108+|-------------|-----------|-----------------|--------------|
109+| x86_64 | 8 bytes | 16 bytes | 8 bytes |
110+| i386 | 4 bytes | 8 bytes | 4 bytes |
111+| ARM64 | 8 bytes | 16 bytes | 8 bytes |
112+| ARM | 4 bytes | 8 bytes | 4 bytes |
113+| RISC-V 64 | 8 bytes | 16 bytes | 8 bytes |
114+
115+### Structure Layout
116+
117+Structures are laid out sequentially in memory with appropriate padding for alignment:
118+
119+```bx
120+type struct Point {
121+ x, y int32; // 8 bytes total
122+};
123+
124+type struct Mixed {
125+ flag bool; // 1 byte
126+ // 3 bytes padding
127+ value int32; // 4 bytes
128+ data int64; // 8 bytes
129+ // Total: 16 bytes
130+};
131+```
132+
133+The compiler automatically inserts padding to ensure proper alignment of structure members.
134+
135+### Pointer Representation
136+
137+Pointers are represented as machine addresses. On 64-bit systems, pointers are 8 bytes; on 32-bit systems, they are 4 bytes. Null pointers are represented as zero values.
138+
139+## Structured Data Types
140+
141+### Structures
142+
143+Structures group related data under a single name:
144+
145+```bx
146+type struct Person {
147+ name string;
148+ age int32;
149+ height float32;
150+};
151+
152+Person john = Person{
153+ name: "John Doe",
154+ age: 30,
155+ height: 5.9
156+};
157+
158+// Access members
159+printf("Name: %s, Age: %d\n", john.name, john.age);
160+```
161+
162+#### Structure Literals
163+
164+Structures can be initialized using literal syntax:
165+
166+```bx
167+// Named field initialization
168+Point p1 = Point{x: 10, y: 20};
169+
170+// Positional initialization (fields must be in order and all must be the same type)
171+Point p2 = Point{15, 25};
172+
173+// Partial initialization (remaining fields are zero or nil)
174+Point p3 = Point{x: 5}; // y is 0
175+```
176+
177+#### Nested Structures
178+
179+Structures can contain other structures:
180+
181+```bx
182+type struct Rectangle {
183+ top_left Point;
184+ bottom_right Point;
185+};
186+
187+Rectangle rect = Rectangle{
188+ top_left: Point{0, 0},
189+ bottom_right: Point{100, 50}
190+};
191+
192+// Access nested members
193+int width = rect.bottom_right.x - rect.top_left.x;
194+```
195+
196+### Enumerations
197+
198+Enumerations define named integer constants:
199+
200+```bx
201+type enum Color {
202+ RED, // 0
203+ GREEN, // 1
204+ BLUE // 2
205+};
206+
207+type enum Status {
208+ OK = 0,
209+ ERROR = -1,
210+ PENDING = 1
211+};
212+
213+Color background = RED;
214+Status result = OK;
215+```
216+
217+Enumerations are strongly typed, preventing accidental mixing of different enum types.
218+
219+## Literals and Escape Sequences
220+
221+### String and Character Literals
222+
223+Bx supports both string literals (enclosed in double quotes) and character literals (enclosed in single quotes):
224+
225+```bx
226+string message = "Hello, World!";
227+int newline_char = '\n';
228+```
229+
230+### Escape Sequences
231+
232+Arbitrary character values can be encoded with escape sequences and used in string or character literals. Bx supports both C-style and B-style escape sequences, which can be enabled simultaneously through feature flags.
233+
234+There are four different formats for arbitrary character values:
235+
236+- `\x` or `*x` followed by exactly two hexadecimal digits
237+- `\` or `*` followed by exactly three octal digits
238+- `\u` or `*u` followed by exactly four hexadecimal digits
239+- `\U` or `*U` followed by exactly eight hexadecimal digits
240+
241+where the escapes `\u`/`*u` and `\U`/`*U` represent Unicode code points.
242+
243+The following special escape values are also available:
244+
245+| Value | Description |
246+|-------|-------------|
247+| `\a` or `*a` | Alert or bell |
248+| `\b` or `*b` | Backspace |
249+| `\\` or `**` | Backslash or asterisk |
250+| `\t` or `*t` | Horizontal tab |
251+| `\n` or `*n` | Line feed or newline |
252+| `\f` or `*f` | Form feed |
253+| `\r` or `*r` | Carriage return |
254+| `\v` or `*v` | Vertical tab |
255+| `\'` or `*'` | Single quote (only in character literals) |
256+| `\"` or `*"` | Double quote (only in string literals) |
257+| `\e` or `*e` | End-of-file |
258+| `\0` or `*0` | Null character |
259+| `\(` or `*(` | Left brace `{` |
260+| `\)` or `*)` | Right brace `}` |
261+
262+#### Usage Examples
263+
264+```bx
265+// C-style escapes (when -Fc-esc is enabled)
266+string c_style = "Line 1\nLine 2\tTabbed\x41\101"; // Mixed hex, octal
267+int escape_char = '\033'; // Octal ESC character
268+
269+// B-style escapes (when -Fb-esc is enabled)
270+string b_style = "Line 1*nLine 2*tTabbed*x42"; // B-style newline, tab, hex
271+string braces = "Function *( body *)"; // B-style braces
272+
273+// Both styles can be mixed when both are enabled
274+string mixed = "C-style: \n B-style: *n"; // Both newlines
275+string unicode = "Unicode: \u0041 or *u0041"; // Both Unicode escapes
276+```
277+
278+#### Feature Flags
279+
280+Escape sequence behavior is controlled through compiler feature flags:
281+
282+| Flag | Description |
283+|------|-------------|
284+| `-Fc-esc` | Enable C-style escape sequences using `\` prefix (default: enabled) |
285+| `-Wc-esc` | Enable warnings for C-style escape sequences |
286+| `-Fb-esc` | Enable B-style escape sequences using `*` prefix (default: disabled) |
287+| `-Wb-esc` | Enable warnings for B-style escape sequences |
288+
289+Both C-style and B-style escape sequences support the same set of escape values and can be enabled simultaneously.
290+
291+```bash
292+# Enable both escape styles with warnings
293+gbc -Fc-esc -Fb-esc -Wc-esc -Wb-esc program.bx
294+
295+# Use only B-style escapes
296+gbc -Fno-c-esc -Fb-esc program.bx
297+```
298+
299+## Operators and Expressions
300+
301+### Arithmetic Operators
302+
303+Bx supports both B (you must enable support for this legacy feature (`-Fb-ops`), or use `-std=B`) and C operators:
304+
305+```bx
306+// Basic arithmetic
307+int a = 10 + 5; // Addition
308+int b = 10 - 5; // Subtraction
309+int c = 10 * 5; // Multiplication
310+int d = 10 / 5; // Division
311+int e = 10 % 3; // Modulus
312+
313+// Compound assignment (C-style)
314+a += 5; // a = a + 5
315+b -= 3; // b = b - 3
316+c *= 2; // c = c * 2
317+
318+// B-style (deprecated, warns when used)
319+a =+ 5; // Equivalent to a += 5
320+b =- 3; // Equivalent to b -= 3
321+```
322+
323+### Bitwise Operators
324+
325+```bx
326+int flags = 0x0F;
327+flags = flags & 0x07; // Bitwise AND
328+flags = flags | 0x08; // Bitwise OR
329+flags = flags ^ 0x04; // Bitwise XOR
330+flags = flags << 1; // Left shift
331+flags = flags >> 1; // Right shift
332+```
333+
334+### Comparison and Logical Operators
335+
336+```bx
337+bool result;
338+result = (a == b); // Equality
339+result = (a != b); // Inequality
340+result = (a < b); // Less than
341+result = (a <= b); // Less than or equal
342+result = (a > b); // Greater than
343+result = (a >= b); // Greater than or equal
344+
345+result = (a && b); // Logical AND
346+result = (a || b); // Logical OR
347+result = !a; // Logical NOT
348+```
349+
350+### Pointer and Array Operators
351+
352+```bx
353+int arr[10] = {1, 2, 3, 4, 5};
354+int *ptr = arr;
355+
356+// Indirection and address-of
357+int value = *ptr; // Dereference pointer
358+ptr = &arr[5]; // Address of arr[5]
359+
360+// Array subscripting
361+arr[0] = 100; // Set first element
362+int first = arr[0]; // Get first element
363+
364+// Pointer arithmetic
365+ptr++; // Move to next element
366+ptr += 5; // Move 5 elements forward
367+```
368+
369+### Ternary Operator
370+
371+The conditional operator provides concise conditional expressions:
372+
373+```bx
374+int max_value = (a > b) ? a : b;
375+string message = (count > 0) ? "items found" : "no items";
376+```
377+
378+## Control Flow
379+
380+### Conditional Statements
381+
382+```bx
383+// Simple if statement
384+if (condition) {
385+ // statements
386+}
387+
388+// If-else
389+if (x > 0) {
390+ printf("Positive\n");
391+} else if (x < 0) {
392+ printf("Negative\n");
393+} else {
394+ printf("Zero\n");
395+}
396+```
397+
398+### Loop Constructs
399+
400+#### While Loops
401+
402+```bx
403+int i = 0;
404+while (i < 10) {
405+ printf("%d\n", i);
406+ i++;
407+}
408+```
409+
410+Note: Bx primarily uses while loops for iteration. Traditional for loops are not supported. But they will be like Go's once supported.
411+
412+### Switch Statements
413+
414+Switch statements support conditions, literals and enum values:
415+
416+```bx
417+switch (value) {
418+ case 1:
419+ printf("One\n");
420+ break;
421+ case 2:
422+ case 3:
423+ printf("Two or Three\n");
424+ break;
425+ case 4, 5:
426+ printf("4 or 5");
427+ break;
428+ default:
429+ printf("Other\n");
430+ break;
431+}
432+```
433+
434+### Loop Control
435+
436+```bx
437+while (condition) {
438+ if (skip_condition) {
439+ continue; // Skip to next iteration
440+ }
441+
442+ if (exit_condition) {
443+ break; // Exit loop
444+ }
445+
446+ // Regular processing
447+}
448+```
449+
450+### Goto Statements
451+
452+Bx supports goto for low-level control flow:
453+
454+```bx
455+start:
456+ if (condition) {
457+ goto end;
458+ }
459+ // processing
460+ goto start;
461+
462+end:
463+ return (result);
464+```
465+
466+## Functions
467+
468+### Function Declarations
469+
470+Functions can be declared with or without type annotations:
471+
472+```bx
473+// Typed function declaration
474+int32 add(a, b int32) {
475+ return (a + b);
476+}
477+
478+// Mixed parameter types
479+void process_data(name string, count int, factor float32) {
480+ // Implementation
481+}
482+
483+// Untyped (B-style)
484+add(a, b) {
485+ return (a + b);
486+}
487+
488+// Void function
489+void print_message(msg string) {
490+ printf("%s\n", msg);
491+}
492+```
493+
494+### External Functions
495+
496+External functions are declared using the `extrn` keyword:
497+
498+```bx
499+extrn printf, malloc, free;
500+
501+// Typed external declarations
502+int extrn strlen;
503+void* extrn malloc;
504+void extrn free;
505+tm* extrn localtime; // see ./examples/cal.bx for an example
506+```
507+
508+### Function Pointers
509+
510+Functions can be treated as first-class values:
511+
512+```bx
513+int (*operation)(int, int) = add;
514+int result = operation(5, 3); // Calls add(5, 3)
515+```
516+
517+### Variadic Functions
518+
519+Functions can accept variable numbers of arguments:
520+
521+```bx
522+void log_message(string format, ...) {
523+ // Impl...
524+}
525+```
526+
527+## Directives and Feature Control
528+
529+### Inline Directives
530+
531+Bx supports inline directives for fine-grained feature control:
532+
533+```bx
534+// [b]: requires: -Ftyped -Fno-strict-decl
535+auto value = get_untyped_value();
536+
537+// [b]: requires: -Wno-type
538+mixed_operation(int_var, float_var);
539+```
540+
541+### Common Feature Flags
542+
543+| Flag | Description | Default |
544+|------|-------------|---------|
545+| `typed` | Enable type system | On |
546+| `float` | Enable floating-point | On |
547+| `c-comments` | Allow // comments | On |
548+| `c-ops` | Use C-style operators | On |
549+| `short-decl` | Enable := syntax | On |
550+| `continue` | Allow continue statement | On |
551+| `strict-decl` | Require initialization | Off |
552+
553+### Warning Control
554+
555+```bx
556+// Disable specific warnings for a section
557+// [b]: requires: -Wno-type -Wno-implicit-decl
558+legacy_code_section();
559+
560+// Enable pedantic warnings
561+// [b]: requires: -Wextra -Wpedantic
562+critical_function();
563+```
564+
565+## Low-Level Details
566+
567+### Calling Conventions
568+
569+Bx follows platform-specific calling conventions:
570+
571+- **System V AMD64 ABI** (i.e Linux)
572+- **Microsoft x64 ABI** on Windows
573+- **AAPCS** on ARM platforms
574+
575+Function arguments are passed in registers when possible, with overflow on the stack.
576+
577+### Stack Frame Layout
578+
579+```
580+Higher addresses
581++------------------+
582+| Return address |
583++------------------+
584+| Saved registers |
585++------------------+
586+| Local variables |
587++------------------+
588+| Spill area |
589++------------------+
590+Lower addresses
591+```
592+
593+### Memory Model
594+
595+Bx uses a simple memory model:
596+- Global variables are allocated in the data segment
597+- Local variables use stack allocation
598+- Dynamic allocation uses heap (via `malloc`/`free`)
599+- Pointer arithmetic follows byte addressing
600+
601+### Floating-Point Representation
602+
603+Bx uses IEEE 754 standard for floating-point numbers:
604+- `float32`: 32-bit single precision
605+- `float64`: 64-bit double precision
606+- Platform `float` maps to the type that matches the machine's word size
607+
608+## Comparison with Classical B
609+
610+| Feature | Classical B | Bx |
611+|---------|------------|-----|
612+| Types | Untyped words | Optional type system |
613+| Operators | `=+`, `=-`, etc. | `+=`, `-=`, etc. (C-style) |
614+| Comments | `/* */` only | `//` and `/* */` |
615+| Data structures | Arrays only | Arrays, structs, enums |
616+| Floating-point | Not supported | Full IEEE 754 support |
617+| Control flow | Basic | Enhanced with `continue` |
618+
619+## Implementation Notes
620+
621+### Compiler Architecture
622+
623+The GBC compiler implements Bx as a superset of B using a multi-pass approach:
624+
625+1. **Lexical Analysis**: Tokenizes source with feature-aware scanning
626+2. **Parsing**: Builds AST with optional type annotations
627+3. **Type Checking**: Optional pass for type validation
628+4. **Code Generation**: Emits QBE intermediate representation
629+5. **Backend**: QBE/LLVM/etc handles optimization and native code generation
630+
+1485,
-0
1@@ -0,0 +1,1485 @@
2+### [Module-Level Inline Assembly](https://llvm.org/docs/LangRef.html#id2008)[¶](https://llvm.org/docs/LangRef.html#module-level-inline-assembly "Link to this heading")
3+
4+Modules may contain “module-level inline asm” blocks, which corresponds to the GCC “file scope inline asm” blocks. These blocks are internally concatenated by LLVM and treated as a single unit, but may be separated in the `.ll` file if desired. The syntax is very simple:
5+
6+```
7+<span></span><span class="k">module</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="s">"inline asm code goes here"</span>
8+<span class="k">module</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="s">"more can go here"</span>
9+```
10+
11+The strings can contain any character by escaping non-printable characters. The escape sequence used is simply “\\xx” where “xx” is the two digit hex code for the number.
12+
13+Note that the assembly string _must_ be parseable by LLVM’s integrated assembler (unless it is disabled), even when emitting a `.s` file.
14+
15+### [Data Layout](https://llvm.org/docs/LangRef.html#id2009)[¶](https://llvm.org/docs/LangRef.html#data-layout "Link to this heading")
16+
17+A module may specify a target-specific data layout string that specifies how data is to be laid out in memory. The syntax for the data layout is simply:
18+
19+```
20+<span></span><span class="k">target</span><span class="w"> </span><span class="k">datalayout</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">"layout specification"</span>
21+```
22+
23+The _layout specification_ consists of a list of specifications separated by the minus sign character (‘-‘). Each specification starts with a letter and may include other information after the letter to define some aspect of the data layout. The specifications accepted are as follows:
24+
25+`E`
26+
27+Specifies that the target lays out data in big-endian form. That is, the bits with the most significance have the lowest address location.
28+
29+`e`
30+
31+Specifies that the target lays out data in little-endian form. That is, the bits with the least significance have the lowest address location.
32+
33+`S<size>`
34+
35+Specifies the natural alignment of the stack in bits. Alignment promotion of stack variables is limited to the natural stack alignment to avoid dynamic stack realignment. If omitted, the natural stack alignment defaults to “unspecified”, which does not prevent any alignment promotions.
36+
37+`P<address space>`
38+
39+Specifies the address space that corresponds to program memory. Harvard architectures can use this to specify what space LLVM should place things such as functions into. If omitted, the program memory space defaults to the default address space of 0, which corresponds to a Von Neumann architecture that has code and data in the same space.
40+
41+`G<address space>`
42+
43+Specifies the address space to be used by default when creating global variables. If omitted, the globals address space defaults to the default address space 0. Note: variable declarations without an address space are always created in address space 0, this property only affects the default value to be used when creating globals without additional contextual information (e.g. in LLVM passes).
44+
45+`A<address space>`
46+
47+Specifies the address space of objects created by ‘`alloca`’. Defaults to the default address space of 0.
48+
49+`p[n]:<size>:<abi>[:<pref>[:<idx>]]`
50+
51+This specifies the properties of a pointer in address space `n`. The `<size>` parameter specifies the size of the bitwise representation. For [non-integral pointers](https://llvm.org/docs/LangRef.html#nointptrtype) the representation size may be larger than the address width of the underlying address space (e.g. to accommodate additional metadata). The alignment requirements are specified via the `<abi>` and `<pref>`erred alignments parameters. The fourth parameter `<idx>` is the size of the index that used for address calculations such as [getelementptr](https://llvm.org/docs/LangRef.html#i-getelementptr). It must be less than or equal to the pointer size. If not specified, the default index size is equal to the pointer size. The index size also specifies the width of addresses in this address space. All sizes are in bits. The address space, `n`, is optional, and if not specified, denotes the default address space 0. The value of `n` must be in the range \[1,2^24).
52+
53+`i<size>:<abi>[:<pref>]`
54+
55+This specifies the alignment for an integer type of a given bit `<size>`. The value of `<size>` must be in the range \[1,2^24). For `i8`, the `<abi>` value must equal 8, that is, `i8` must be naturally aligned.
56+
57+`v<size>:<abi>[:<pref>]`
58+
59+This specifies the alignment for a vector type of a given bit `<size>`. The value of `<size>` must be in the range \[1,2^24).
60+
61+`f<size>:<abi>[:<pref>]`
62+
63+This specifies the alignment for a floating-point type of a given bit `<size>`. Only values of `<size>` that are supported by the target will work. 32 (float) and 64 (double) are supported on all targets; 80 or 128 (different flavors of long double) are also supported on some targets. The value of `<size>` must be in the range \[1,2^24).
64+
65+`a:<abi>[:<pref>]`
66+
67+This specifies the alignment for an object of aggregate type. In addition to the usual requirements for alignment values, the value of `<abi>` can also be zero, which means one byte alignment.
68+
69+`F<type><abi>`
70+
71+This specifies the alignment for function pointers. The options for `<type>` are:
72+
73+- `i`: The alignment of function pointers is independent of the alignment of functions, and is a multiple of `<abi>`.
74+
75+- `n`: The alignment of function pointers is a multiple of the explicit alignment specified on the function, and is a multiple of `<abi>`.
76+
77+
78+`m:<mangling>`
79+
80+If present, specifies that llvm names are mangled in the output. Symbols prefixed with the mangling escape character `\01` are passed through directly to the assembler without the escape character. The mangling style options are
81+
82+- `e`: ELF mangling: Private symbols get a `.L` prefix.
83+
84+- `l`: GOFF mangling: Private symbols get a `@` prefix.
85+
86+- `m`: Mips mangling: Private symbols get a `$` prefix.
87+
88+- `o`: Mach-O mangling: Private symbols get `L` prefix. Other symbols get a `_` prefix.
89+
90+- `x`: Windows x86 COFF mangling: Private symbols get the usual prefix. Regular C symbols get a `_` prefix. Functions with `__stdcall`, `__fastcall`, and `__vectorcall` have custom mangling that appends `@N` where N is the number of bytes used to pass parameters. C++ symbols starting with `?` are not mangled in any way.
91+
92+- `w`: Windows COFF mangling: Similar to `x`, except that normal C symbols do not receive a `_` prefix.
93+
94+- `a`: XCOFF mangling: Private symbols get a `L..` prefix.
95+
96+
97+`n<size1>:<size2>:<size3>...`
98+
99+This specifies a set of native integer widths for the target CPU in bits. For example, it might contain `n32` for 32-bit PowerPC, `n32:64` for PowerPC 64, or `n8:16:32:64` for X86-64. Elements of this set are considered to support most general arithmetic operations efficiently.
100+
101+`ni:<address space0>:<address space1>:<address space2>...`
102+
103+This specifies pointer types with the specified address spaces as [Non-Integral Pointer Type](https://llvm.org/docs/LangRef.html#nointptrtype) s. The `0` address space cannot be specified as non-integral.
104+
105+`<abi>` is a lower bound on what is required for a type to be considered aligned. This is used in various places, such as:
106+
107+- The alignment for loads and stores if none is explicitly given.
108+
109+- The alignment used to compute struct layout.
110+
111+- The alignment used to compute allocation sizes and thus `getelementptr` offsets.
112+
113+- The alignment below which accesses are considered underaligned.
114+
115+
116+`<pref>` allows providing a more optimal alignment that should be used when possible, primarily for `alloca` and the alignment of global variables. It is an optional value that must be greater than or equal to `<abi>`. If omitted, the preceding `:` should also be omitted and `<pref>` will be equal to `<abi>`.
117+
118+Unless explicitly stated otherwise, every alignment specification is provided in bits and must be in the range \[1,2^16). The value must be a power of two times the width of a byte (i.e., `align = 8 * 2^N`).
119+
120+When constructing the data layout for a given target, LLVM starts with a default set of specifications which are then (possibly) overridden by the specifications in the `datalayout` keyword. The default specifications are given in this list:
121+
122+- `e` - little endian
123+
124+- `p:64:64:64` - 64-bit pointers with 64-bit alignment.
125+
126+- `p[n]:64:64:64` - Other address spaces are assumed to be the same as the default address space.
127+
128+- `S0` - natural stack alignment is unspecified
129+
130+- `i1:8:8` - i1 is 8-bit (byte) aligned
131+
132+- `i8:8:8` - i8 is 8-bit (byte) aligned as mandated
133+
134+- `i16:16:16` - i16 is 16-bit aligned
135+
136+- `i32:32:32` - i32 is 32-bit aligned
137+
138+- `i64:32:64` - i64 has ABI alignment of 32-bits but preferred alignment of 64-bits
139+
140+- `f16:16:16` - half is 16-bit aligned
141+
142+- `f32:32:32` - float is 32-bit aligned
143+
144+- `f64:64:64` - double is 64-bit aligned
145+
146+- `f128:128:128` - quad is 128-bit aligned
147+
148+- `v64:64:64` - 64-bit vector is 64-bit aligned
149+
150+- `v128:128:128` - 128-bit vector is 128-bit aligned
151+
152+- `a:0:64` - aggregates are 64-bit aligned
153+
154+
155+When LLVM is determining the alignment for a given type, it uses the following rules:
156+
157+1. If the type sought is an exact match for one of the specifications, that specification is used.
158+
159+2. If no match is found, and the type sought is an integer type, then the smallest integer type that is larger than the bitwidth of the sought type is used. If none of the specifications are larger than the bitwidth then the largest integer type is used. For example, given the default specifications above, the i7 type will use the alignment of i8 (next largest) while both i65 and i256 will use the alignment of i64 (largest specified).
160+
161+
162+The function of the data layout string may not be what you expect. Notably, this is not a specification from the frontend of what alignment the code generator should use.
163+
164+Instead, if specified, the target data layout is required to match what the ultimate _code generator_ expects. This string is used by the mid-level optimizers to improve code, and this only works if it matches what the ultimate code generator uses. There is no way to generate IR that does not embed this target-specific detail into the IR. If you don’t specify the string, the default specifications will be used to generate a Data Layout and the optimization phases will operate accordingly and introduce target specificity into the IR with respect to these default specifications.
165+
166+### [Target Triple](https://llvm.org/docs/LangRef.html#id2010)[¶](https://llvm.org/docs/LangRef.html#target-triple "Link to this heading")
167+
168+A module may specify a target triple string that describes the target host. The syntax for the target triple is simply:
169+
170+```
171+<span></span><span class="k">target</span><span class="w"> </span><span class="k">triple</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">"x86_64-apple-macosx10.7.0"</span>
172+```
173+
174+The _target triple_ string consists of a series of identifiers delimited by the minus sign character (‘-‘). The canonical forms are:
175+
176+```
177+<span></span><span class="n">ARCHITECTURE</span><span class="o">-</span><span class="n">VENDOR</span><span class="o">-</span><span class="n">OPERATING_SYSTEM</span>
178+<span class="n">ARCHITECTURE</span><span class="o">-</span><span class="n">VENDOR</span><span class="o">-</span><span class="n">OPERATING_SYSTEM</span><span class="o">-</span><span class="n">ENVIRONMENT</span>
179+```
180+
181+This information is passed along to the backend so that it generates code for the proper architecture. It’s possible to override this on the command line with the `-mtriple` command-line option.
182+
183+### [Allocated Objects](https://llvm.org/docs/LangRef.html#id2011)[¶](https://llvm.org/docs/LangRef.html#allocated-objects "Link to this heading")
184+
185+An allocated object, memory object, or simply object, is a region of a memory space that is reserved by a memory allocation such as [alloca](https://llvm.org/docs/LangRef.html#i-alloca), heap allocation calls, and global variable definitions. Once it is allocated, the bytes stored in the region can only be read or written through a pointer that is [based on](https://llvm.org/docs/LangRef.html#pointeraliasing) the allocation value. If a pointer that is not based on the object tries to read or write to the object, it is undefined behavior.
186+
187+The following properties hold for all allocated objects, otherwise the behavior is undefined:
188+
189+- no allocated object may cross the unsigned address space boundary (including the pointer after the end of the object),
190+
191+- the size of all allocated objects must be non-negative and not exceed the largest signed integer that fits into the index type.
192+
193+
194+Allocated objects that are created with operations recognized by LLVM (such as [alloca](https://llvm.org/docs/LangRef.html#i-alloca), heap allocation functions marked as such, and global variables) may _not_ change their size. (`realloc`\-style operations do not change the size of an existing allocated object; instead, they create a new allocated object. Even if the object is at the same location as the old one, old pointers cannot be used to access this new object.) However, allocated objects can also be created by means not recognized by LLVM, e.g. by directly calling `mmap`. Those allocated objects are allowed to grow to the right (i.e., keeping the same base address, but increasing their size) while maintaining the validity of existing pointers, as long as they always satisfy the properties described above. Currently, allocated objects are not permitted to grow to the left or to shrink, nor can they have holes.
195+
196+### [Object Lifetime](https://llvm.org/docs/LangRef.html#id2012)[¶](https://llvm.org/docs/LangRef.html#object-lifetime "Link to this heading")
197+
198+A lifetime of an [allocated object](https://llvm.org/docs/LangRef.html#allocatedobjects) is a property that decides its accessibility. Unless stated otherwise, an allocated object is alive since its allocation, and dead after its deallocation. It is undefined behavior to access an allocated object that isn’t alive, but operations that don’t dereference it such as [getelementptr](https://llvm.org/docs/LangRef.html#i-getelementptr), [ptrtoint](https://llvm.org/docs/LangRef.html#i-ptrtoint) and [icmp](https://llvm.org/docs/LangRef.html#i-icmp) return a valid result. This explains code motion of these instructions across operations that impact the object’s lifetime. A stack object’s lifetime can be explicitly specified using [llvm.lifetime.start](https://llvm.org/docs/LangRef.html#int-lifestart) and [llvm.lifetime.end](https://llvm.org/docs/LangRef.html#int-lifeend) intrinsic function calls.
199+
200+### [Pointer Aliasing Rules](https://llvm.org/docs/LangRef.html#id2013)[¶](https://llvm.org/docs/LangRef.html#pointer-aliasing-rules "Link to this heading")
201+
202+Any memory access must be done through a pointer value associated with an address range of the memory access, otherwise the behavior is undefined. Pointer values are associated with address ranges according to the following rules:
203+
204+- A pointer value is associated with the addresses associated with any value it is _based_ on.
205+
206+- An address of a global variable is associated with the address range of the variable’s storage.
207+
208+- The result value of an allocation instruction is associated with the address range of the allocated storage.
209+
210+- A null pointer in the default address-space is associated with no address.
211+
212+- An [undef value](https://llvm.org/docs/LangRef.html#undefvalues) in _any_ address-space is associated with no address.
213+
214+- An integer constant other than zero or a pointer value returned from a function not defined within LLVM may be associated with address ranges allocated through mechanisms other than those provided by LLVM. Such ranges shall not overlap with any ranges of addresses allocated by mechanisms provided by LLVM.
215+
216+
217+A pointer value is _based_ on another pointer value according to the following rules:
218+
219+- A pointer value formed from a scalar `getelementptr` operation is _based_ on the pointer-typed operand of the `getelementptr`.
220+
221+- The pointer in lane _l_ of the result of a vector `getelementptr` operation is _based_ on the pointer in lane _l_ of the vector-of-pointers-typed operand of the `getelementptr`.
222+
223+- The result value of a `bitcast` is _based_ on the operand of the `bitcast`.
224+
225+- A pointer value formed by an `inttoptr` is _based_ on all pointer values that contribute (directly or indirectly) to the computation of the pointer’s value.
226+
227+- The “_based_ on” relationship is transitive.
228+
229+
230+Note that this definition of _“based”_ is intentionally similar to the definition of _“based”_ in C99, though it is slightly weaker.
231+
232+LLVM IR does not associate types with memory. The result type of a `load` merely indicates the size and alignment of the memory from which to load, as well as the interpretation of the value. The first operand type of a `store` similarly only indicates the size and alignment of the store.
233+
234+Consequently, type-based alias analysis, aka TBAA, aka `-fstrict-aliasing`, is not applicable to general unadorned LLVM IR. [Metadata](https://llvm.org/docs/LangRef.html#metadata) may be used to encode additional information which specialized optimization passes may use to implement type-based alias analysis.
235+
236+### [Pointer Capture](https://llvm.org/docs/LangRef.html#id2014)[¶](https://llvm.org/docs/LangRef.html#pointer-capture "Link to this heading")
237+
238+Given a function call and a pointer that is passed as an argument or stored in memory before the call, the call may capture two components of the pointer:
239+
240+> - The address of the pointer, which is its integral value. This also includes parts of the address or any information about the address, including the fact that it does not equal one specific value. We further distinguish whether only the fact that the address is/isn’t null is captured.
241+>
242+> - The provenance of the pointer, which is the ability to perform memory accesses through the pointer, in the sense of the [pointer aliasing rules](https://llvm.org/docs/LangRef.html#pointeraliasing). We further distinguish whether only read accesses are allowed, or both reads and writes.
243+>
244+
245+For example, the following function captures the address of `%a`, because it is compared to a pointer, leaking information about the identity of the pointer:
246+
247+```
248+<span></span><span class="vg">@glb</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="m">0</span>
249+
250+<span class="k">define</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
251+<span class="w"> </span><span class="nv">%c</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">icmp</span><span class="w"> </span><span class="k">eq</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="vg">@glb</span>
252+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="nv">%c</span>
253+<span class="p">}</span>
254+```
255+
256+The function does not capture the provenance of the pointer, because the `icmp` instruction only operates on the pointer address. The following function captures both the address and provenance of the pointer, as both may be read from `@glb` after the function returns:
257+
258+```
259+<span></span><span class="vg">@glb</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span>
260+
261+<span class="k">define</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
262+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb</span>
263+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="k">void</span>
264+<span class="p">}</span>
265+```
266+
267+The following function captures _neither_ the address nor the provenance of the pointer:
268+
269+```
270+<span></span><span class="k">define</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
271+<span class="w"> </span><span class="nv">%v</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">load</span><span class="w"> </span><span class="kt">i32</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span>
272+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">i32</span>
273+<span class="p">}</span>
274+```
275+
276+While address capture includes uses of the address within the body of the function, provenance capture refers exclusively to the ability to perform accesses _after_ the function returns. Memory accesses within the function itself are not considered pointer captures.
277+
278+We can further say that the capture only occurs through a specific location. In the following example, the pointer (both address and provenance) is captured through the return value only:
279+
280+```
281+<span></span><span class="k">define</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
282+<span class="w"> </span><span class="nv">%gep</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">getelementptr</span><span class="w"> </span><span class="kt">i8</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="m">4</span>
283+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%gep</span>
284+<span class="p">}</span>
285+```
286+
287+However, we always consider direct inspection of the pointer address (e.g. using `ptrtoint`) to be location-independent. The following example is _not_ considered a return-only capture, even though the `ptrtoint` ultimately only contributes to the return value:
288+
289+```
290+<span></span><span class="vg">@lookup</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">constant</span><span class="w"> </span><span class="p">[</span><span class="m">4</span><span class="w"> </span><span class="k">x</span><span class="w"> </span><span class="kt">i8</span><span class="p">]</span><span class="w"> </span><span class="p">[</span><span class="kt">i8</span><span class="w"> </span><span class="m">0</span><span class="p">,</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="m">2</span><span class="p">,</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="m">3</span><span class="p">]</span>
291+
292+<span class="k">define</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
293+<span class="w"> </span><span class="nv">%a.addr</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">ptrtoint</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i64</span>
294+<span class="w"> </span><span class="nv">%mask</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%a.addr</span><span class="p">,</span><span class="w"> </span><span class="m">3</span>
295+<span class="w"> </span><span class="nv">%gep</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">getelementptr</span><span class="w"> </span><span class="kt">i8</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@lookup</span><span class="p">,</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%mask</span>
296+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%gep</span>
297+<span class="p">}</span>
298+```
299+
300+This definition is chosen to allow capture analysis to continue with the return value in the usual fashion.
301+
302+The following describes possible ways to capture a pointer in more detail, where unqualified uses of the word “capture” refer to capturing both address and provenance.
303+
304+1. The call stores any bit of the pointer carrying information into a place, and the stored bits can be read from the place by the caller after this call exits.
305+
306+
307+```
308+<span></span><span class="vg">@glb</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span>
309+<span class="vg">@glb2</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span>
310+<span class="vg">@glb3</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span>
311+<span class="vg">@glbi</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="m">0</span>
312+
313+<span class="k">define</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%b</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%c</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%d</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
314+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb</span><span class="w"> </span><span class="c">; %a is captured by this call</span>
315+
316+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%b</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb2</span><span class="w"> </span><span class="c">; %b isn't captured because the stored value is overwritten by the store below</span>
317+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb2</span>
318+
319+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%c</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb3</span>
320+<span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="vg">@g</span><span class="p">()</span><span class="w"> </span><span class="c">; If @g makes a copy of %c that outlives this call (@f), %c is captured</span>
321+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb3</span>
322+
323+<span class="w"> </span><span class="nv">%i</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">ptrtoint</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%d</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i64</span>
324+<span class="w"> </span><span class="nv">%j</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">trunc</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%i</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i32</span>
325+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="nv">%j</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glbi</span><span class="w"> </span><span class="c">; %d is captured</span>
326+
327+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%e</span><span class="w"> </span><span class="c">; %e is captured</span>
328+<span class="p">}</span>
329+```
330+
331+2. The call stores any bit of the pointer carrying information into a place, and the stored bits can be safely read from the place by another thread via synchronization.
332+
333+
334+```
335+<span></span><span class="vg">@lock</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="k">true</span>
336+
337+<span class="k">define</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
338+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb</span>
339+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="k">atomic</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="k">false</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@lock</span><span class="w"> </span><span class="k">release</span><span class="w"> </span><span class="c">; %a is captured because another thread can safely read @glb</span>
340+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@glb</span>
341+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="k">void</span>
342+<span class="p">}</span>
343+```
344+
345+3. The call’s behavior depends on any bit of the pointer carrying information (address capture only).
346+
347+
348+```
349+<span></span><span class="vg">@glb</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="m">0</span>
350+
351+<span class="k">define</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="vg">@f</span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
352+<span class="w"> </span><span class="nv">%c</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">icmp</span><span class="w"> </span><span class="k">eq</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="vg">@glb</span>
353+<span class="w"> </span><span class="k">br</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="nv">%c</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%BB_EXIT</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%BB_CONTINUE</span><span class="w"> </span><span class="c">; captures address of %a only</span>
354+<span class="nl">BB_EXIT:</span>
355+<span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="vg">@exit</span><span class="p">()</span>
356+<span class="w"> </span><span class="k">unreachable</span>
357+<span class="nl">BB_CONTINUE:</span>
358+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="k">void</span>
359+<span class="p">}</span>
360+```
361+
362+4. The pointer is used as the pointer operand of a volatile access.
363+
364+
365+### [Volatile Memory Accesses](https://llvm.org/docs/LangRef.html#id2015)[¶](https://llvm.org/docs/LangRef.html#volatile-memory-accesses "Link to this heading")
366+
367+Certain memory accesses, such as [load](https://llvm.org/docs/LangRef.html#i-load)’s, [store](https://llvm.org/docs/LangRef.html#i-store)’s, and [llvm.memcpy](https://llvm.org/docs/LangRef.html#int-memcpy)’s may be marked `volatile`. The optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations. The optimizers _may_ change the order of volatile operations relative to non-volatile operations. This is not Java’s “volatile” and has no cross-thread synchronization behavior.
368+
369+A volatile load or store may have additional target-specific semantics. Any volatile operation can have side effects, and any volatile operation can read and/or modify state which is not accessible via a regular load or store in this module. Volatile operations may use addresses which do not point to memory (like MMIO registers). This means the compiler may not use a volatile operation to prove a non-volatile access to that address has defined behavior. This includes addresses typically forbidden, such as the pointer with bit-value 0.
370+
371+The allowed side-effects for volatile accesses are limited. If a non-volatile store to a given address would be legal, a volatile operation may modify the memory at that address. A volatile operation may not modify any other memory accessible by the module being compiled. A volatile operation may not call any code in the current module.
372+
373+In general (without target-specific context), the address space of a volatile operation may not be changed. Different address spaces may have different trapping behavior when dereferencing an invalid pointer.
374+
375+The compiler may assume execution will continue after a volatile operation, so operations which modify memory or may have undefined behavior can be hoisted past a volatile operation.
376+
377+As an exception to the preceding rule, the compiler may not assume execution will continue after a volatile store operation. This restriction is necessary to support the somewhat common pattern in C of intentionally storing to an invalid pointer to crash the program. In the future, it might make sense to allow frontends to control this behavior.
378+
379+IR-level volatile loads and stores cannot safely be optimized into `llvm.memcpy` or `llvm.memmove` intrinsics even when those intrinsics are flagged volatile. Likewise, the backend should never split or merge target-legal volatile load/store instructions. Similarly, IR-level volatile loads and stores cannot change from integer to floating-point or vice versa.
380+
381+Rationale
382+
383+Platforms may rely on volatile loads and stores of natively supported data width to be executed as single instruction. For example, in C this holds for an l-value of volatile primitive type with native hardware support, but not necessarily for aggregate types. The frontend upholds these expectations, which are intentionally unspecified in the IR. The rules above ensure that IR transformations do not violate the frontend’s contract with the language.
384+
385+### [Memory Model for Concurrent Operations](https://llvm.org/docs/LangRef.html#id2016)[¶](https://llvm.org/docs/LangRef.html#memory-model-for-concurrent-operations "Link to this heading")
386+
387+The LLVM IR does not define any way to start parallel threads of execution or to register signal handlers. Nonetheless, there are platform-specific ways to create them, and we define LLVM IR’s behavior in their presence. This model is inspired by the C++ memory model.
388+
389+For a more informal introduction to this model, see the [LLVM Atomic Instructions and Concurrency Guide](https://llvm.org/docs/LangRef.htmlAtomics.html).
390+
391+We define a _happens-before_ partial order as the least partial order that
392+
393+- Is a superset of single-thread program order, and
394+
395+- When `a` _synchronizes-with_ `b`, includes an edge from `a` to `b`. _Synchronizes-with_ pairs are introduced by platform-specific techniques, like pthread locks, thread creation, thread joining, etc., and by atomic instructions. (See also [Atomic Memory Ordering Constraints](https://llvm.org/docs/LangRef.html#ordering)).
396+
397+
398+Note that program order does not introduce _happens-before_ edges between a thread and signals executing inside that thread.
399+
400+Every (defined) read operation (load instructions, memcpy, atomic loads/read-modify-writes, etc.) R reads a series of bytes written by (defined) write operations (store instructions, atomic stores/read-modify-writes, memcpy, etc.). For the purposes of this section, initialized globals are considered to have a write of the initializer which is atomic and happens before any other read or write of the memory in question. For each byte of a read R, R<sub>byte</sub> may see any write to the same byte, except:
401+
402+- If write<sub>1</sub> happens before write<sub>2</sub>, and write<sub>2</sub> happens before R<sub>byte</sub>, then R<sub>byte</sub> does not see write<sub>1</sub>.
403+
404+- If R<sub>byte</sub> happens before write<sub>3</sub>, then R<sub>byte</sub> does not see write<sub>3</sub>.
405+
406+
407+Given that definition, R<sub>byte</sub> is defined as follows:
408+
409+- If R is volatile, the result is target-dependent. (Volatile is supposed to give guarantees which can support `sig_atomic_t` in C/C++, and may be used for accesses to addresses that do not behave like normal memory. It does not generally provide cross-thread synchronization.)
410+
411+- Otherwise, if there is no write to the same byte that happens before R<sub>byte</sub>, R<sub>byte</sub> returns `undef` for that byte.
412+
413+- Otherwise, if R<sub>byte</sub> may see exactly one write, R<sub>byte</sub> returns the value written by that write.
414+
415+- Otherwise, if R is atomic, and all the writes R<sub>byte</sub> may see are atomic, it chooses one of the values written. See the [Atomic Memory Ordering Constraints](https://llvm.org/docs/LangRef.html#ordering) section for additional constraints on how the choice is made.
416+
417+- Otherwise R<sub>byte</sub> returns `undef`.
418+
419+
420+R returns the value composed of the series of bytes it read. This implies that some bytes within the value may be `undef` **without** the entire value being `undef`. Note that this only defines the semantics of the operation; it doesn’t mean that targets will emit more than one instruction to read the series of bytes.
421+
422+Note that in cases where none of the atomic intrinsics are used, this model places only one restriction on IR transformations on top of what is required for single-threaded execution: introducing a store to a byte which might not otherwise be stored is not allowed in general. (Specifically, in the case where another thread might write to and read from an address, introducing a store can change a load that may see exactly one write into a load that may see multiple writes.)
423+
424+### [Atomic Memory Ordering Constraints](https://llvm.org/docs/LangRef.html#id2017)[¶](https://llvm.org/docs/LangRef.html#atomic-memory-ordering-constraints "Link to this heading")
425+
426+Atomic instructions ([cmpxchg](https://llvm.org/docs/LangRef.html#i-cmpxchg), [atomicrmw](https://llvm.org/docs/LangRef.html#i-atomicrmw), [fence](https://llvm.org/docs/LangRef.html#i-fence), [atomic load](https://llvm.org/docs/LangRef.html#i-load), and [atomic store](https://llvm.org/docs/LangRef.html#i-store)) take ordering parameters that determine which other atomic instructions on the same address they _synchronize with_. These semantics implement the Java or C++ memory models; if these descriptions aren’t precise enough, check those specs (see spec references in the [atomics guide](https://llvm.org/docs/LangRef.htmlAtomics.html)). [fence](https://llvm.org/docs/LangRef.html#i-fence) instructions treat these orderings somewhat differently since they don’t take an address. See that instruction’s documentation for details.
427+
428+For a simpler introduction to the ordering constraints, see the [LLVM Atomic Instructions and Concurrency Guide](https://llvm.org/docs/LangRef.htmlAtomics.html).
429+
430+`unordered`
431+
432+The set of values that can be read is governed by the happens-before partial order. A value cannot be read unless some operation wrote it. This is intended to provide a guarantee strong enough to model Java’s non-volatile shared variables. This ordering cannot be specified for read-modify-write operations; it is not strong enough to make them atomic in any interesting way.
433+
434+`monotonic`
435+
436+In addition to the guarantees of `unordered`, there is a single total order for modifications by `monotonic` operations on each address. All modification orders must be compatible with the happens-before order. There is no guarantee that the modification orders can be combined to a global total order for the whole program (and this often will not be possible). The read in an atomic read-modify-write operation ([cmpxchg](https://llvm.org/docs/LangRef.html#i-cmpxchg) and [atomicrmw](https://llvm.org/docs/LangRef.html#i-atomicrmw)) reads the value in the modification order immediately before the value it writes. If one atomic read happens before another atomic read of the same address, the later read must see the same value or a later value in the address’s modification order. This disallows reordering of `monotonic` (or stronger) operations on the same address. If an address is written `monotonic`\-ally by one thread, and other threads `monotonic`\-ally read that address repeatedly, the other threads must eventually see the write. This corresponds to the C/C++ `memory_order_relaxed`.
437+
438+`acquire`
439+
440+In addition to the guarantees of `monotonic`, a _synchronizes-with_ edge may be formed with a `release` operation. This is intended to model C/C++’s `memory_order_acquire`.
441+
442+`release`
443+
444+In addition to the guarantees of `monotonic`, if this operation writes a value which is subsequently read by an `acquire` operation, it _synchronizes-with_ that operation. Furthermore, this occurs even if the value written by a `release` operation has been modified by a read-modify-write operation before being read. (Such a set of operations comprises a _release sequence_). This corresponds to the C/C++ `memory_order_release`.
445+
446+`acq_rel` (acquire+release)
447+
448+Acts as both an `acquire` and `release` operation on its address. This corresponds to the C/C++ `memory_order_acq_rel`.
449+
450+`seq_cst` (sequentially consistent)
451+
452+In addition to the guarantees of `acq_rel` (`acquire` for an operation that only reads, `release` for an operation that only writes), there is a global total order on all sequentially-consistent operations on all addresses. Each sequentially-consistent read sees the last preceding write to the same address in this global order. This corresponds to the C/C++ `memory_order_seq_cst` and Java `volatile`.
453+
454+Note: this global total order is _not_ guaranteed to be fully consistent with the _happens-before_ partial order if non-`seq_cst` accesses are involved. See the C++ standard [\[atomics.order\]](https://wg21.link/atomics.order) section for more details on the exact guarantees.
455+
456+If an atomic operation is marked `syncscope("singlethread")`, it only _synchronizes with_ and only participates in the seq\_cst total orderings of other operations running in the same thread (for example, in signal handlers).
457+
458+If an atomic operation is marked `syncscope("<target-scope>")`, where `<target-scope>` is a target-specific synchronization scope, then it is target dependent if it _synchronizes with_ and participates in the seq\_cst total orderings of other operations.
459+
460+Otherwise, an atomic operation that is not marked `syncscope("singlethread")` or `syncscope("<target-scope>")` _synchronizes with_ and participates in the seq\_cst total orderings of other operations that are not marked `syncscope("singlethread")` or `syncscope("<target-scope>")`.
461+
462+### [Floating-Point Environment](https://llvm.org/docs/LangRef.html#id2018)[¶](https://llvm.org/docs/LangRef.html#floating-point-environment "Link to this heading")
463+
464+The default LLVM floating-point environment assumes that traps are disabled and status flags are not observable. Therefore, floating-point math operations do not have side effects and may be speculated freely. Results assume the round-to-nearest rounding mode, and subnormals are assumed to be preserved.
465+
466+Running LLVM code in an environment where these assumptions are not met typically leads to undefined behavior. The `strictfp` and `denormal-fp-math` attributes as well as [Constrained Floating-Point Intrinsics](https://llvm.org/docs/LangRef.html#constrainedfp) can be used to weaken LLVM’s assumptions and ensure defined behavior in non-default floating-point environments; see their respective documentation for details.
467+
468+### [Behavior of Floating-Point NaN values](https://llvm.org/docs/LangRef.html#id2019)[¶](https://llvm.org/docs/LangRef.html#behavior-of-floating-point-nan-values "Link to this heading")
469+
470+A floating-point NaN value consists of a sign bit, a quiet/signaling bit, and a payload (which makes up the rest of the mantissa except for the quiet/signaling bit). LLVM assumes that the quiet/signaling bit being set to `1` indicates a quiet NaN (QNaN), and a value of `0` indicates a signaling NaN (SNaN). In the following we will hence just call it the “quiet bit”.
471+
472+The representation bits of a floating-point value do not mutate arbitrarily; in particular, if there is no floating-point operation being performed, NaN signs, quiet bits, and payloads are preserved.
473+
474+For the purpose of this section, `bitcast` as well as the following operations are not “floating-point math operations”: `fneg`, `llvm.fabs`, and `llvm.copysign`. These operations act directly on the underlying bit representation and never change anything except possibly for the sign bit.
475+
476+Floating-point math operations that return a NaN are an exception from the general principle that LLVM implements IEEE-754 semantics. Unless specified otherwise, the following rules apply whenever the IEEE-754 semantics say that a NaN value is returned: the result has a non-deterministic sign; the quiet bit and payload are non-deterministically chosen from the following set of options:
477+
478+- The quiet bit is set and the payload is all-zero. (“Preferred NaN” case)
479+
480+- The quiet bit is set and the payload is copied from any input operand that is a NaN. (“Quieting NaN propagation” case)
481+
482+- The quiet bit and payload are copied from any input operand that is a NaN. (“Unchanged NaN propagation” case)
483+
484+- The quiet bit is set and the payload is picked from a target-specific set of “extra” possible NaN payloads. The set can depend on the input operand values. This set is empty on x86 and ARM, but can be non-empty on other architectures. (For instance, on wasm, if any input NaN does not have the preferred all-zero payload or any input NaN is an SNaN, then this set contains all possible payloads; otherwise, it is empty. On SPARC, this set consists of the all-one payload.)
485+
486+
487+In particular, if all input NaNs are quiet (or if there are no input NaNs), then the output NaN is definitely quiet. Signaling NaN outputs can only occur if they are provided as an input value. For example, “fmul SNaN, 1.0” may be simplified to SNaN rather than QNaN. Similarly, if all input NaNs are preferred (or if there are no input NaNs) and the target does not have any “extra” NaN payloads, then the output NaN is guaranteed to be preferred.
488+
489+Floating-point math operations are allowed to treat all NaNs as if they were quiet NaNs. For example, “pow(1.0, SNaN)” may be simplified to 1.0.
490+
491+Code that requires different behavior than this should use the [Constrained Floating-Point Intrinsics](https://llvm.org/docs/LangRef.html#constrainedfp). In particular, constrained intrinsics rule out the “Unchanged NaN propagation” case; they are guaranteed to return a QNaN.
492+
493+Unfortunately, due to hard-or-impossible-to-fix issues, LLVM violates its own specification on some architectures:
494+
495+- x86-32 without SSE2 enabled may convert floating-point values to x86\_fp80 and back when performing floating-point math operations; this can lead to results with different precision than expected and it can alter NaN values. Since optimizations can make contradicting assumptions, this can lead to arbitrary miscompilations. See [issue #44218](https://github.com/llvm/llvm-project/issues/44218).
496+
497+- x86-32 (even with SSE2 enabled) may implicitly perform such a conversion on values returned from a function for some calling conventions. See [issue #66803](https://github.com/llvm/llvm-project/issues/66803).
498+
499+- Older MIPS versions use the opposite polarity for the quiet/signaling bit, and LLVM does not correctly represent this. See [issue #60796](https://github.com/llvm/llvm-project/issues/60796).
500+
501+
502+### [Floating-Point Semantics](https://llvm.org/docs/LangRef.html#id2020)[¶](https://llvm.org/docs/LangRef.html#floating-point-semantics "Link to this heading")
503+
504+This section defines the semantics for core floating-point operations on types that use a format specified by IEEE-754. These types are: `half`, `float`, `double`, and `fp128`, which correspond to the binary16, binary32, binary64, and binary128 formats, respectively. The “core” operations are those defined in section 5 of IEEE-754, which all have corresponding LLVM operations.
505+
506+The value returned by those operations matches that of the corresponding IEEE-754 operation executed in the [default LLVM floating-point environment](https://llvm.org/docs/LangRef.html#floatenv), except that the behavior of NaN results is instead [as specified here](https://llvm.org/docs/LangRef.html#floatnan). In particular, such a floating-point instruction returning a non-NaN value is guaranteed to always return the same bit-identical result on all machines and optimization levels.
507+
508+This means that optimizations and backends may not change the observed bitwise result of these operations in any way (unless NaNs are returned), and frontends can rely on these operations providing correctly rounded results as described in the standard.
509+
510+(Note that this is only about the value returned by these operations; see the [floating-point environment section](https://llvm.org/docs/LangRef.html#floatenv) regarding flags and exceptions.)
511+
512+Various flags, attributes, and metadata can alter the behavior of these operations and thus make them not bit-identical across machines and optimization levels any more: most notably, the [fast-math flags](https://llvm.org/docs/LangRef.html#fastmath) as well as the [strictfp](https://llvm.org/docs/LangRef.html#strictfp) and [denormal-fp-math](https://llvm.org/docs/LangRef.html#denormal-fp-math) attributes and fpmath metadata <fpmath-metadata>. See their corresponding documentation for details.
513+
514+### [Fast-Math Flags](https://llvm.org/docs/LangRef.html#id2021)[¶](https://llvm.org/docs/LangRef.html#fast-math-flags "Link to this heading")
515+
516+LLVM IR floating-point operations ([fneg](https://llvm.org/docs/LangRef.html#i-fneg), [fadd](https://llvm.org/docs/LangRef.html#i-fadd), [fsub](https://llvm.org/docs/LangRef.html#i-fsub), [fmul](https://llvm.org/docs/LangRef.html#i-fmul), [fdiv](https://llvm.org/docs/LangRef.html#i-fdiv), [frem](https://llvm.org/docs/LangRef.html#i-frem), [fcmp](https://llvm.org/docs/LangRef.html#i-fcmp), [fptrunc](https://llvm.org/docs/LangRef.html#i-fptrunc), [fpext](https://llvm.org/docs/LangRef.html#i-fpext)), and [phi](https://llvm.org/docs/LangRef.html#i-phi), [select](https://llvm.org/docs/LangRef.html#i-select), or [call](https://llvm.org/docs/LangRef.html#i-call) instructions that return floating-point types may use the following flags to enable otherwise unsafe floating-point transformations.
517+
518+`fast`
519+
520+This flag is a shorthand for specifying all fast-math flags at once, and imparts no additional semantics from using all of them.
521+
522+`nnan`
523+
524+No NaNs - Allow optimizations to assume the arguments and result are not NaN. If an argument is a nan, or the result would be a nan, it produces a [poison value](https://llvm.org/docs/LangRef.html#poisonvalues) instead.
525+
526+`ninf`
527+
528+No Infs - Allow optimizations to assume the arguments and result are not +/-Inf. If an argument is +/-Inf, or the result would be +/-Inf, it produces a [poison value](https://llvm.org/docs/LangRef.html#poisonvalues) instead.
529+
530+`nsz`
531+
532+No Signed Zeros - Allow optimizations to treat the sign of a zero argument or zero result as insignificant. This does not imply that -0.0 is poison and/or guaranteed to not exist in the operation.
533+
534+Note: For [phi](https://llvm.org/docs/LangRef.html#i-phi), [select](https://llvm.org/docs/LangRef.html#i-select), and [call](https://llvm.org/docs/LangRef.html#i-call) instructions, the following return types are considered to be floating-point types:
535+
536+- Floating-point scalar or vector types
537+
538+- Array types (nested to any depth) of floating-point scalar or vector types
539+
540+- Homogeneous literal struct types of floating-point scalar or vector types
541+
542+
543+#### [Rewrite-based flags](https://llvm.org/docs/LangRef.html#id2022)[¶](https://llvm.org/docs/LangRef.html#rewrite-based-flags "Link to this heading")
544+
545+The following flags have rewrite-based semantics. These flags allow expressions, potentially containing multiple non-consecutive instructions, to be rewritten into alternative instructions. When multiple instructions are involved in an expression, it is necessary that all of the instructions have the necessary rewrite-based flag present on them, and the rewritten instructions will generally have the intersection of the flags present on the input instruction.
546+
547+In the following example, the floating-point expression in the body of `@orig` has `contract` and `reassoc` in common, and thus if it is rewritten into the expression in the body of `@target`, all of the new instructions get those two flags and only those flags as a result. Since the `arcp` is present on only one of the instructions in the expression, it is not present in the transformed expression. Furthermore, this reassociation here is only legal because both the instructions had the `reassoc` flag; if only one had it, it would not be legal to make the transformation.
548+
549+```
550+<span></span><span class="k">define</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="vg">@orig</span><span class="p">(</span><span class="kt">double</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%b</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%c</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
551+<span class="w"> </span><span class="nv">%t1</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">fmul</span><span class="w"> </span><span class="k">contract</span><span class="w"> </span><span class="k">reassoc</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="nv">%b</span>
552+<span class="w"> </span><span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">fmul</span><span class="w"> </span><span class="k">contract</span><span class="w"> </span><span class="k">reassoc</span><span class="w"> </span><span class="k">arcp</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%t1</span><span class="p">,</span><span class="w"> </span><span class="nv">%c</span>
553+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%val</span>
554+<span class="p">}</span>
555+
556+<span class="k">define</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="vg">@target</span><span class="p">(</span><span class="kt">double</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%b</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%c</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
557+<span class="w"> </span><span class="nv">%t1</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">fmul</span><span class="w"> </span><span class="k">contract</span><span class="w"> </span><span class="k">reassoc</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%b</span><span class="p">,</span><span class="w"> </span><span class="nv">%c</span>
558+<span class="w"> </span><span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">fmul</span><span class="w"> </span><span class="k">contract</span><span class="w"> </span><span class="k">reassoc</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%a</span><span class="p">,</span><span class="w"> </span><span class="nv">%t1</span>
559+<span class="w"> </span><span class="k">ret</span><span class="w"> </span><span class="kt">double</span><span class="w"> </span><span class="nv">%val</span>
560+<span class="p">}</span>
561+```
562+
563+These rules do not apply to the other fast-math flags. Whether or not a flag like `nnan` is present on any or all of the rewritten instructions is based on whether or not it is possible for said instruction to have a NaN input or output, given the original flags.
564+
565+`arcp`
566+
567+Allows division to be treated as a multiplication by a reciprocal. Specifically, this permits `a / b` to be considered equivalent to `a * (1.0 / b)` (which may subsequently be susceptible to code motion), and it also permits `a / (b / c)` to be considered equivalent to `a * (c / b)`. Both of these rewrites can be applied in either direction: `a * (c / b)` can be rewritten into `a / (b / c)`.
568+
569+`contract`
570+
571+Allow floating-point contraction (e.g. fusing a multiply followed by an addition into a fused multiply-and-add). This does not enable reassociation to form arbitrary contractions. For example, `(a*b) + (c*d) + e` can not be transformed into `(a*b) + ((c*d) + e)` to create two fma operations.
572+
573+`afn`
574+
575+Approximate functions - Allow substitution of approximate calculations for functions (sin, log, sqrt, etc). See floating-point intrinsic definitions for places where this can apply to LLVM’s intrinsic math functions.
576+
577+`reassoc`
578+
579+Allow algebraically equivalent transformations for floating-point instructions such as reassociation transformations. This may dramatically change results in floating-point.
580+
581+### [Use-list Order Directives](https://llvm.org/docs/LangRef.html#id2023)[¶](https://llvm.org/docs/LangRef.html#use-list-order-directives "Link to this heading")
582+
583+Use-list directives encode the in-memory order of each use-list, allowing the order to be recreated. `<order-indexes>` is a comma-separated list of indexes that are assigned to the referenced value’s uses. The referenced value’s use-list is immediately sorted by these indexes.
584+
585+Use-list directives may appear at function scope or global scope. They are not instructions, and have no effect on the semantics of the IR. When they’re at function scope, they must appear after the terminator of the final basic block.
586+
587+If basic blocks have their address taken via `blockaddress()` expressions, `uselistorder_bb` can be used to reorder their use-lists from outside their function’s scope.
588+
589+Syntax:
590+
591+```
592+<span></span><span class="n">uselistorder</span> <span class="o"><</span><span class="n">ty</span><span class="o">></span> <span class="o"><</span><span class="n">value</span><span class="o">></span><span class="p">,</span> <span class="p">{</span> <span class="o"><</span><span class="n">order</span><span class="o">-</span><span class="n">indexes</span><span class="o">></span> <span class="p">}</span>
593+<span class="n">uselistorder_bb</span> <span class="nd">@function</span><span class="p">,</span> <span class="o">%</span><span class="n">block</span> <span class="p">{</span> <span class="o"><</span><span class="n">order</span><span class="o">-</span><span class="n">indexes</span><span class="o">></span> <span class="p">}</span>
594+```
595+
596+Examples:
597+
598+```
599+<span></span><span class="n">define</span> <span class="n">void</span> <span class="nd">@foo</span><span class="p">(</span><span class="n">i32</span> <span class="o">%</span><span class="n">arg1</span><span class="p">,</span> <span class="n">i32</span> <span class="o">%</span><span class="n">arg2</span><span class="p">)</span> <span class="p">{</span>
600+<span class="n">entry</span><span class="p">:</span>
601+ <span class="p">;</span> <span class="o">...</span> <span class="n">instructions</span> <span class="o">...</span>
602+<span class="n">bb</span><span class="p">:</span>
603+ <span class="p">;</span> <span class="o">...</span> <span class="n">instructions</span> <span class="o">...</span>
604+
605+ <span class="p">;</span> <span class="n">At</span> <span class="n">function</span> <span class="n">scope</span><span class="o">.</span>
606+ <span class="n">uselistorder</span> <span class="n">i32</span> <span class="o">%</span><span class="n">arg1</span><span class="p">,</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span> <span class="p">}</span>
607+ <span class="n">uselistorder</span> <span class="n">label</span> <span class="o">%</span><span class="n">bb</span><span class="p">,</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span> <span class="p">}</span>
608+<span class="p">}</span>
609+
610+<span class="p">;</span> <span class="n">At</span> <span class="k">global</span> <span class="n">scope</span><span class="o">.</span>
611+<span class="n">uselistorder</span> <span class="n">ptr</span> <span class="nd">@global</span><span class="p">,</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span> <span class="p">}</span>
612+<span class="n">uselistorder</span> <span class="n">i32</span> <span class="mi">7</span><span class="p">,</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span> <span class="p">}</span>
613+<span class="n">uselistorder</span> <span class="n">i32</span> <span class="p">(</span><span class="n">i32</span><span class="p">)</span> <span class="nd">@bar</span><span class="p">,</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span> <span class="p">}</span>
614+<span class="n">uselistorder_bb</span> <span class="nd">@foo</span><span class="p">,</span> <span class="o">%</span><span class="n">bb</span><span class="p">,</span> <span class="p">{</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">4</span> <span class="p">}</span>
615+```
616+
617+### [Source Filename](https://llvm.org/docs/LangRef.html#id2024)[¶](https://llvm.org/docs/LangRef.html#source-filename "Link to this heading")
618+
619+The _source filename_ string is set to the original module identifier, which will be the name of the compiled source file when compiling from source through the clang front end, for example. It is then preserved through the IR and bitcode.
620+
621+This is currently necessary to generate a consistent unique global identifier for local functions used in profile data, which prepends the source file name to the local function name.
622+
623+The syntax for the source file name is simply:
624+
625+```
626+<span></span>source_filename = "/path/to/source.c"
627+```
628+
629+## [Type System](https://llvm.org/docs/LangRef.html#id2025)[¶](https://llvm.org/docs/LangRef.html#type-system "Link to this heading")
630+
631+The LLVM type system is one of the most important features of the intermediate representation. Being typed enables a number of optimizations to be performed on the intermediate representation directly, without having to do extra analyses on the side before the transformation. A strong type system makes it easier to read the generated code and enables novel analyses and transformations that are not feasible to perform on normal three address code representations.
632+
633+### [Void Type](https://llvm.org/docs/LangRef.html#id2026)[¶](https://llvm.org/docs/LangRef.html#void-type "Link to this heading")
634+
635+Overview:
636+
637+The void type does not represent any value and has no size.
638+
639+Syntax:
640+
641+```
642+<span></span><span class="n">void</span>
643+```
644+
645+### [Function Type](https://llvm.org/docs/LangRef.html#id2027)[¶](https://llvm.org/docs/LangRef.html#function-type "Link to this heading")
646+
647+Overview:
648+
649+The function type can be thought of as a function signature. It consists of a return type and a list of formal parameter types. The return type of a function type is a void type or first class type — except for [label](https://llvm.org/docs/LangRef.html#t-label) and [metadata](https://llvm.org/docs/LangRef.html#t-metadata) types.
650+
651+Syntax:
652+
653+```
654+<span></span><span class="o"><</span><span class="n">returntype</span><span class="o">></span> <span class="p">(</span><span class="o"><</span><span class="n">parameter</span> <span class="nb">list</span><span class="o">></span><span class="p">)</span>
655+```
656+
657+…where ‘`<parameter list>`’ is a comma-separated list of type specifiers. Optionally, the parameter list may include a type `...`, which indicates that the function takes a variable number of arguments. Variable argument functions can access their arguments with the [variable argument handling intrinsic](https://llvm.org/docs/LangRef.html#int-varargs) functions. ‘`<returntype>`’ is any type except [label](https://llvm.org/docs/LangRef.html#t-label) and [metadata](https://llvm.org/docs/LangRef.html#t-metadata).
658+
659+Examples:
660+
661+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">i32</span> <span class="pre">(i32)</span></code></p></td><td><p>function taking an <code class="docutils literal notranslate"><span class="pre">i32</span></code>, returning an <code class="docutils literal notranslate"><span class="pre">i32</span></code></p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">i32</span> <span class="pre">(ptr,</span> <span class="pre">...)</span></code></p></td><td><p>A vararg function that takes at least one <a class="reference internal" href="https://llvm.org/docs/LangRef.html#t-pointer"><span class="std std-ref">pointer</span></a> argument and returns an integer. This is the signature for <code class="docutils literal notranslate"><span class="pre">printf</span></code> in LLVM.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">{i32,</span> <span class="pre">i32}</span> <span class="pre">(i32)</span></code></p></td><td><p>A function taking an <code class="docutils literal notranslate"><span class="pre">i32</span></code>, returning a <a class="reference internal" href="https://llvm.org/docs/LangRef.html#t-struct"><span class="std std-ref">structure</span></a> containing two <code class="docutils literal notranslate"><span class="pre">i32</span></code> values</p></td></tr></tbody></table>
662+
663+### [Opaque Structure Types](https://llvm.org/docs/LangRef.html#id2028)[¶](https://llvm.org/docs/LangRef.html#opaque-structure-types "Link to this heading")
664+
665+Overview:
666+
667+Opaque structure types are used to represent structure types that do not have a body specified. This corresponds (for example) to the C notion of a forward declared structure. They can be named (`%X`) or unnamed (`%52`).
668+
669+It is not possible to create SSA values with an opaque structure type. In practice, this largely limits their use to the value type of external globals.
670+
671+Syntax:
672+
673+```
674+<span></span><span class="o">%</span><span class="n">X</span> <span class="o">=</span> <span class="nb">type</span> <span class="n">opaque</span>
675+<span class="o">%</span><span class="mi">52</span> <span class="o">=</span> <span class="nb">type</span> <span class="n">opaque</span>
676+
677+<span class="nd">@g</span> <span class="o">=</span> <span class="n">external</span> <span class="k">global</span> <span class="o">%</span><span class="n">X</span>
678+```
679+
680+### [First Class Types](https://llvm.org/docs/LangRef.html#id2029)[¶](https://llvm.org/docs/LangRef.html#first-class-types "Link to this heading")
681+
682+The [first class](https://llvm.org/docs/LangRef.html#t-firstclass) types are perhaps the most important. Values of these types are the only ones which can be produced by instructions.
683+
684+#### [Single Value Types](https://llvm.org/docs/LangRef.html#id2030)[¶](https://llvm.org/docs/LangRef.html#single-value-types "Link to this heading")
685+
686+These are the types that are valid in registers from CodeGen’s perspective.
687+
688+##### Integer Type[¶](https://llvm.org/docs/LangRef.html#integer-type "Link to this heading")
689+
690+Overview:
691+
692+The integer type is a very simple type that simply specifies an arbitrary bit width for the integer type desired. Any bit width from 1 bit to 2<sup>23</sup>(about 8 million) can be specified.
693+
694+Syntax:
695+
696+```
697+<span></span><span class="n">iN</span>
698+```
699+
700+The number of bits the integer will occupy is specified by the `N` value.
701+
702+###### Examples:[¶](https://llvm.org/docs/LangRef.html#examples "Link to this heading")
703+
704+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">i1</span></code></p></td><td><p>a single-bit integer.</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">i32</span></code></p></td><td><p>a 32-bit integer.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">i1942652</span></code></p></td><td><p>a really big integer of over 1 million bits.</p></td></tr></tbody></table>
705+
706+##### Floating-Point Types[¶](https://llvm.org/docs/LangRef.html#floating-point-types "Link to this heading")
707+
708+|
709+Type
710+
711+ |
712+
713+Description
714+
715+ |
716+| --- | --- |
717+|
718+
719+`half`
720+
721+ |
722+
723+16-bit floating-point value (IEEE-754 binary16)
724+
725+ |
726+|
727+
728+`bfloat`
729+
730+ |
731+
732+16-bit “brain” floating-point value (7-bit significand). Provides the same number of exponent bits as `float`, so that it matches its dynamic range, but with greatly reduced precision. Used in Intel’s AVX-512 BF16 extensions and Arm’s ARMv8.6-A extensions, among others.
733+
734+ |
735+|
736+
737+`float`
738+
739+ |
740+
741+32-bit floating-point value (IEEE-754 binary32)
742+
743+ |
744+|
745+
746+`double`
747+
748+ |
749+
750+64-bit floating-point value (IEEE-754 binary64)
751+
752+ |
753+|
754+
755+`fp128`
756+
757+ |
758+
759+128-bit floating-point value (IEEE-754 binary128)
760+
761+ |
762+|
763+
764+`x86_fp80`
765+
766+ |
767+
768+80-bit floating-point value (X87)
769+
770+ |
771+|
772+
773+`ppc_fp128`
774+
775+ |
776+
777+128-bit floating-point value (two 64-bits)
778+
779+ |
780+
781+##### X86\_amx Type[¶](https://llvm.org/docs/LangRef.html#x86-amx-type "Link to this heading")
782+
783+Overview:
784+
785+The x86\_amx type represents a value held in an AMX tile register on an x86 machine. The operations allowed on it are quite limited. Only a few intrinsics are allowed: stride load and store, zero and dot product. No instruction is allowed for this type. There are no arguments, arrays, pointers, vectors or constants of this type.
786+
787+Syntax:
788+
789+```
790+<span></span><span class="n">x86_amx</span>
791+```
792+
793+##### Pointer Type[¶](https://llvm.org/docs/LangRef.html#pointer-type "Link to this heading")
794+
795+Overview:
796+
797+The pointer type `ptr` is used to specify memory locations. Pointers are commonly used to reference objects in memory.
798+
799+Pointer types may have an optional address space attribute defining the numbered address space where the pointed-to object resides. For example, `ptr addrspace(5)` is a pointer to address space 5. In addition to integer constants, `addrspace` can also reference one of the address spaces defined in the [datalayout string](https://llvm.org/docs/LangRef.html#langref-datalayout). `addrspace("A")` will use the alloca address space, `addrspace("G")` the default globals address space and `addrspace("P")` the program address space.
800+
801+The representation of pointers can be different for each address space and does not necessarily need to be a plain integer address (e.g. for [non-integral pointers](https://llvm.org/docs/LangRef.html#nointptrtype)). In addition to a representation bits size, pointers in each address space also have an index size which defines the bitwidth of indexing operations as well as the size of integer addresses in this address space. For example, CHERI capabilities are twice the size of the underlying addresses to accommodate for additional metadata such as bounds and permissions: on a 32-bit system the bitwidth of the pointer representation size is 64, but the underlying address width remains 32 bits.
802+
803+The default address space is number zero.
804+
805+The semantics of non-zero address spaces are target-specific. Memory access through a non-dereferenceable pointer is undefined behavior in any address space. Pointers with the bit-value 0 are only assumed to be non-dereferenceable in address space 0, unless the function is marked with the `null_pointer_is_valid` attribute. However, _volatile_ access to any non-dereferenceable address may have defined behavior (according to the target), and in this case the attribute is not needed even for address 0.
806+
807+If an object can be proven accessible through a pointer with a different address space, the access may be modified to use that address space. Exceptions apply if the operation is `volatile`.
808+
809+Prior to LLVM 15, pointer types also specified a pointee type, such as `i8*`, `[4 x i32]*` or `i32 (i32*)*`. In LLVM 15, such “typed pointers” are still supported under non-default options. See the [opaque pointers document](https://llvm.org/docs/LangRef.htmlOpaquePointers.html) for more information.
810+
811+##### Target Extension Type[¶](https://llvm.org/docs/LangRef.html#target-extension-type "Link to this heading")
812+
813+Overview:
814+
815+Target extension types represent types that must be preserved through optimization, but are otherwise generally opaque to the compiler. They may be used as function parameters or arguments, and in [phi](https://llvm.org/docs/LangRef.html#i-phi) or [select](https://llvm.org/docs/LangRef.html#i-select) instructions. Some types may be also used in [alloca](https://llvm.org/docs/LangRef.html#i-alloca) instructions or as global values, and correspondingly it is legal to use [load](https://llvm.org/docs/LangRef.html#i-load) and [store](https://llvm.org/docs/LangRef.html#i-store) instructions on them. Full semantics for these types are defined by the target.
816+
817+The only constants that target extension types may have are `zeroinitializer`, `undef`, and `poison`. Other possible values for target extension types may arise from target-specific intrinsics and functions.
818+
819+These types cannot be converted to other types. As such, it is not legal to use them in [bitcast](https://llvm.org/docs/LangRef.html#i-bitcast) instructions (as a source or target type), nor is it legal to use them in [ptrtoint](https://llvm.org/docs/LangRef.html#i-ptrtoint) or [inttoptr](https://llvm.org/docs/LangRef.html#i-inttoptr) instructions. Similarly, they are not legal to use in an [icmp](https://llvm.org/docs/LangRef.html#i-icmp) instruction.
820+
821+Target extension types have a name and optional type or integer parameters. The meanings of name and parameters are defined by the target. When being defined in LLVM IR, all of the type parameters must precede all of the integer parameters.
822+
823+Specific target extension types are registered with LLVM as having specific properties. These properties can be used to restrict the type from appearing in certain contexts, such as being the type of a global variable or having a `zeroinitializer` constant be valid. A complete list of type properties may be found in the documentation for `llvm::TargetExtType::Property` ([doxygen](https://llvm.org/doxygen/classllvm_1_1TargetExtType.html)).
824+
825+Syntax:
826+
827+```
828+<span></span><span class="k">target</span><span class="p">(</span><span class="s">"label"</span><span class="p">)</span>
829+<span class="k">target</span><span class="p">(</span><span class="s">"label"</span><span class="p">,</span><span class="w"> </span><span class="k">void</span><span class="p">)</span>
830+<span class="k">target</span><span class="p">(</span><span class="s">"label"</span><span class="p">,</span><span class="w"> </span><span class="k">void</span><span class="p">,</span><span class="w"> </span><span class="kt">i32</span><span class="p">)</span>
831+<span class="k">target</span><span class="p">(</span><span class="s">"label"</span><span class="p">,</span><span class="w"> </span><span class="m">0</span><span class="p">,</span><span class="w"> </span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="m">2</span><span class="p">)</span>
832+<span class="k">target</span><span class="p">(</span><span class="s">"label"</span><span class="p">,</span><span class="w"> </span><span class="k">void</span><span class="p">,</span><span class="w"> </span><span class="kt">i32</span><span class="p">,</span><span class="w"> </span><span class="m">0</span><span class="p">,</span><span class="w"> </span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="m">2</span><span class="p">)</span>
833+```
834+
835+##### Vector Type[¶](https://llvm.org/docs/LangRef.html#vector-type "Link to this heading")
836+
837+Overview:
838+
839+A vector type is a simple derived type that represents a vector of elements. Vector types are used when multiple primitive data are operated in parallel using a single instruction (SIMD). A vector type requires a size (number of elements), an underlying primitive data type, and a scalable property to represent vectors where the exact hardware vector length is unknown at compile time. Vector types are considered [first class](https://llvm.org/docs/LangRef.html#t-firstclass).
840+
841+Memory Layout:
842+
843+In general vector elements are laid out in memory in the same way as [array types](https://llvm.org/docs/LangRef.html#t-array). Such an analogy works fine as long as the vector elements are byte sized. However, when the elements of the vector aren’t byte sized it gets a bit more complicated. One way to describe the layout is by describing what happens when a vector such as <N x iM> is bitcasted to an integer type with N\*M bits, and then following the rules for storing such an integer to memory.
844+
845+A bitcast from a vector type to a scalar integer type will see the elements being packed together (without padding). The order in which elements are inserted in the integer depends on endianness. For little endian element zero is put in the least significant bits of the integer, and for big endian element zero is put in the most significant bits.
846+
847+Using a vector such as `<i4 1, i4 2, i4 3, i4 5>` as an example, together with the analogy that we can replace a vector store by a bitcast followed by an integer store, we get this for big endian:
848+
849+```
850+<span></span><span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">bitcast</span><span class="w"> </span><span class="p"><</span><span class="m">4</span><span class="w"> </span><span class="k">x</span><span class="w"> </span><span class="kt">i4</span><span class="p">></span><span class="w"> </span><span class="p"><</span><span class="kt">i4</span><span class="w"> </span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">2</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">3</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">5</span><span class="p">></span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i16</span>
851+
852+<span class="c">; Bitcasting from a vector to an integral type can be seen as</span>
853+<span class="c">; concatenating the values:</span>
854+<span class="c">; %val now has the hexadecimal value 0x1235.</span>
855+
856+<span class="k">store</span><span class="w"> </span><span class="kt">i16</span><span class="w"> </span><span class="nv">%val</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%ptr</span>
857+
858+<span class="c">; In memory the content will be (8-bit addressing):</span>
859+<span class="c">;</span>
860+<span class="c">; [%ptr + 0]: 00010010 (0x12)</span>
861+<span class="c">; [%ptr + 1]: 00110101 (0x35)</span>
862+```
863+
864+The same example for little endian:
865+
866+```
867+<span></span><span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">bitcast</span><span class="w"> </span><span class="p"><</span><span class="m">4</span><span class="w"> </span><span class="k">x</span><span class="w"> </span><span class="kt">i4</span><span class="p">></span><span class="w"> </span><span class="p"><</span><span class="kt">i4</span><span class="w"> </span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">2</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">3</span><span class="p">,</span><span class="w"> </span><span class="kt">i4</span><span class="w"> </span><span class="m">5</span><span class="p">></span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i16</span>
868+
869+<span class="c">; Bitcasting from a vector to an integral type can be seen as</span>
870+<span class="c">; concatenating the values:</span>
871+<span class="c">; %val now has the hexadecimal value 0x5321.</span>
872+
873+<span class="k">store</span><span class="w"> </span><span class="kt">i16</span><span class="w"> </span><span class="nv">%val</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%ptr</span>
874+
875+<span class="c">; In memory the content will be (8-bit addressing):</span>
876+<span class="c">;</span>
877+<span class="c">; [%ptr + 0]: 00100001 (0x21)</span>
878+<span class="c">; [%ptr + 1]: 01010011 (0x53)</span>
879+```
880+
881+When `<N*M>` isn’t evenly divisible by the byte size the exact memory layout is unspecified (just like it is for an integral type of the same size). This is because different targets could put the padding at different positions when the type size is smaller than the type’s store size.
882+
883+Syntax:
884+
885+```
886+<span></span><span class="o"><</span> <span class="o"><</span><span class="c1"># elements> x <elementtype> > ; Fixed-length vector</span>
887+<span class="o"><</span> <span class="n">vscale</span> <span class="n">x</span> <span class="o"><</span><span class="c1"># elements> x <elementtype> > ; Scalable vector</span>
888+```
889+
890+The number of elements is a constant integer value larger than 0; elementtype may be any integer, floating-point, pointer type, or a sized target extension type that has the `CanBeVectorElement` property. Vectors of size zero are not allowed. For scalable vectors, the total number of elements is a constant multiple (called vscale) of the specified number of elements; vscale is a positive integer that is unknown at compile time and the same hardware-dependent constant for all scalable vectors at run time. The size of a specific scalable vector type is thus constant within IR, even if the exact size in bytes cannot be determined until run time.
891+
892+Examples:
893+
894+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre"><4</span> <span class="pre">x</span> <span class="pre">i32></span></code></p></td><td><p>Vector of 4 32-bit integer values.</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre"><8</span> <span class="pre">x</span> <span class="pre">float></span></code></p></td><td><p>Vector of 8 32-bit floating-point values.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre"><2</span> <span class="pre">x</span> <span class="pre">i64></span></code></p></td><td><p>Vector of 2 64-bit integer values.</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre"><4</span> <span class="pre">x</span> <span class="pre">ptr></span></code></p></td><td><p>Vector of 4 pointers</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre"><vscale</span> <span class="pre">x</span> <span class="pre">4</span> <span class="pre">x</span> <span class="pre">i32></span></code></p></td><td><p>Vector with a multiple of 4 32-bit integer values.</p></td></tr></tbody></table>
895+
896+#### [Label Type](https://llvm.org/docs/LangRef.html#id2031)[¶](https://llvm.org/docs/LangRef.html#label-type "Link to this heading")
897+
898+Overview:
899+
900+The label type represents code labels.
901+
902+Syntax:
903+
904+```
905+<span></span><span class="n">label</span>
906+```
907+
908+#### [Token Type](https://llvm.org/docs/LangRef.html#id2032)[¶](https://llvm.org/docs/LangRef.html#token-type "Link to this heading")
909+
910+Overview:
911+
912+The token type is used when a value is associated with an instruction but all uses of the value must not attempt to introspect or obscure it. As such, it is not appropriate to have a [phi](https://llvm.org/docs/LangRef.html#i-phi) or [select](https://llvm.org/docs/LangRef.html#i-select) of type token.
913+
914+Syntax:
915+
916+```
917+<span></span><span class="n">token</span>
918+```
919+
920+#### [Metadata Type](https://llvm.org/docs/LangRef.html#id2033)[¶](https://llvm.org/docs/LangRef.html#metadata-type "Link to this heading")
921+
922+Overview:
923+
924+The metadata type represents embedded metadata. No derived types may be created from metadata except for [function](https://llvm.org/docs/LangRef.html#t-function) arguments.
925+
926+Syntax:
927+
928+```
929+<span></span><span class="n">metadata</span>
930+```
931+
932+#### [Aggregate Types](https://llvm.org/docs/LangRef.html#id2034)[¶](https://llvm.org/docs/LangRef.html#aggregate-types "Link to this heading")
933+
934+Aggregate Types are a subset of derived types that can contain multiple member types. [Arrays](https://llvm.org/docs/LangRef.html#t-array) and [structs](https://llvm.org/docs/LangRef.html#t-struct) are aggregate types. [Vectors](https://llvm.org/docs/LangRef.html#t-vector) are not considered to be aggregate types.
935+
936+##### Array Type[¶](https://llvm.org/docs/LangRef.html#array-type "Link to this heading")
937+
938+Overview:
939+
940+The array type is a very simple derived type that arranges elements sequentially in memory. The array type requires a size (number of elements) and an underlying data type.
941+
942+Syntax:
943+
944+```
945+<span></span><span class="p">[</span><span class="o"><</span><span class="c1"># elements> x <elementtype>]</span>
946+```
947+
948+The number of elements is a constant integer value; `elementtype` may be any type with a size.
949+
950+Examples:
951+
952+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">[40</span> <span class="pre">x</span> <span class="pre">i32]</span></code></p></td><td><p>Array of 40 32-bit integer values.</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">[41</span> <span class="pre">x</span> <span class="pre">i32]</span></code></p></td><td><p>Array of 41 32-bit integer values.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">[4</span> <span class="pre">x</span> <span class="pre">i8]</span></code></p></td><td><p>Array of 4 8-bit integer values.</p></td></tr></tbody></table>
953+
954+Here are some examples of multidimensional arrays:
955+
956+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">[3</span> <span class="pre">x</span> <span class="pre">[4</span> <span class="pre">x</span> <span class="pre">i32]]</span></code></p></td><td><p>3x4 array of 32-bit integer values.</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">[12</span> <span class="pre">x</span> <span class="pre">[10</span> <span class="pre">x</span> <span class="pre">float]]</span></code></p></td><td><p>12x10 array of single precision floating-point values.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">[2</span> <span class="pre">x</span> <span class="pre">[3</span> <span class="pre">x</span> <span class="pre">[4</span> <span class="pre">x</span> <span class="pre">i16]]]</span></code></p></td><td><p>2x3x4 array of 16-bit integer values.</p></td></tr></tbody></table>
957+
958+There is no restriction on indexing beyond the end of the array implied by a static type (though there are restrictions on indexing beyond the bounds of an [allocated object](https://llvm.org/docs/LangRef.html#allocatedobjects) in some cases). This means that single-dimension ‘variable sized array’ addressing can be implemented in LLVM with a zero length array type. An implementation of ‘pascal style arrays’ in LLVM could use the type “`{ i32, [0 x float]}`”, for example.
959+
960+##### Structure Type[¶](https://llvm.org/docs/LangRef.html#structure-type "Link to this heading")
961+
962+Overview:
963+
964+The structure type is used to represent a collection of data members together in memory. The elements of a structure may be any type that has a size.
965+
966+Structures in memory are accessed using ‘`load`’ and ‘`store`’ by getting a pointer to a field with the ‘`getelementptr`’ instruction. Structures in registers are accessed using the ‘`extractvalue`’ and ‘`insertvalue`’ instructions.
967+
968+Structures may optionally be “packed” structures, which indicate that the alignment of the struct is one byte, and that there is no padding between the elements. In non-packed structs, padding between field types is inserted as defined by the DataLayout string in the module, which is required to match what the underlying code generator expects.
969+
970+Structures can either be “literal” or “identified”. A literal structure is defined inline with other types (e.g. `[2 x {i32, i32}]`) whereas identified types are always defined at the top level with a name. Literal types are uniqued by their contents and can never be recursive or opaque since there is no way to write one. Identified types can be opaqued and are never uniqued. Identified types must not be recursive.
971+
972+Syntax:
973+
974+```
975+<span></span><span class="o">%</span><span class="n">T1</span> <span class="o">=</span> <span class="nb">type</span> <span class="p">{</span> <span class="o"><</span><span class="nb">type</span> <span class="nb">list</span><span class="o">></span> <span class="p">}</span> <span class="p">;</span> <span class="n">Identified</span> <span class="n">normal</span> <span class="n">struct</span> <span class="nb">type</span>
976+<span class="o">%</span><span class="n">T2</span> <span class="o">=</span> <span class="nb">type</span> <span class="o"><</span><span class="p">{</span> <span class="o"><</span><span class="nb">type</span> <span class="nb">list</span><span class="o">></span> <span class="p">}</span><span class="o">></span> <span class="p">;</span> <span class="n">Identified</span> <span class="n">packed</span> <span class="n">struct</span> <span class="nb">type</span>
977+```
978+
979+Examples:
980+
981+<table class="docutils align-default"><tbody><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">i32,</span> <span class="pre">i32,</span> <span class="pre">i32</span> <span class="pre">}</span></code></p></td><td><p>A triple of three <code class="docutils literal notranslate"><span class="pre">i32</span></code> values (this is a “homogeneous” struct as all element types are the same)</p></td></tr><tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">float,</span> <span class="pre">ptr</span> <span class="pre">}</span></code></p></td><td><p>A pair, where the first element is a <code class="docutils literal notranslate"><span class="pre">float</span></code> and the second element is a <a class="reference internal" href="https://llvm.org/docs/LangRef.html#t-pointer"><span class="std std-ref">pointer</span></a>.</p></td></tr><tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre"><{</span> <span class="pre">i8,</span> <span class="pre">i32</span> <span class="pre">}></span></code></p></td><td><p>A packed struct known to be 5 bytes in size.</p></td></tr></tbody></table>
982+
983+## [Constants](https://llvm.org/docs/LangRef.html#id2035)[¶](https://llvm.org/docs/LangRef.html#constants "Link to this heading")
984+
985+LLVM has several different basic types of constants. This section describes them all and their syntax.
986+
987+### [Simple Constants](https://llvm.org/docs/LangRef.html#id2036)[¶](https://llvm.org/docs/LangRef.html#simple-constants "Link to this heading")
988+
989+**Boolean constants**
990+
991+The two strings ‘`true`’ and ‘`false`’ are both valid constants of the `i1` type.
992+
993+**Integer constants**
994+
995+Standard integers (such as ‘4’) are constants of the [integer](https://llvm.org/docs/LangRef.html#t-integer) type. They can be either decimal or hexadecimal. Decimal integers can be prefixed with - to represent negative integers, e.g. ‘`-1234`’. Hexadecimal integers must be prefixed with either u or s to indicate whether they are unsigned or signed respectively. e.g ‘`u0x8000`’ gives 32768, whilst ‘`s0x8000`’ gives -32768.
996+
997+Note that hexadecimal integers are sign extended from the number of active bits, i.e., the bit width minus the number of leading zeros. So ‘`s0x0001`’ of type ‘`i16`’ will be -1, not 1.
998+
999+**Floating-point constants**
1000+
1001+Floating-point constants use standard decimal notation (e.g. 123.421), exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal notation (see below). The assembler requires the exact decimal value of a floating-point constant. For example, the assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating decimal in binary. Floating-point constants must have a [floating-point](https://llvm.org/docs/LangRef.html#t-floating) type.
1002+
1003+**Null pointer constants**
1004+
1005+The identifier ‘`null`’ is recognized as a null pointer constant and must be of [pointer type](https://llvm.org/docs/LangRef.html#t-pointer).
1006+
1007+**Token constants**
1008+
1009+The identifier ‘`none`’ is recognized as an empty token constant and must be of [token type](https://llvm.org/docs/LangRef.html#t-token).
1010+
1011+The one non-intuitive notation for constants is the hexadecimal form of floating-point constants. For example, the form ‘`double 0x432ff973cafa8000`’ is equivalent to (but harder to read than) ‘`double 4.5e+15`’. The only time hexadecimal floating-point constants are required (and the only time that they are generated by the disassembler) is when a floating-point constant must be emitted but it cannot be represented as a decimal floating-point number in a reasonable number of digits. For example, NaN’s, infinities, and other special values are represented in their IEEE hexadecimal format so that assembly and disassembly do not cause any bits to change in the constants.
1012+
1013+When using the hexadecimal form, constants of types bfloat, half, float, and double are represented using the 16-digit form shown above (which matches the IEEE754 representation for double); bfloat, half and float values must, however, be exactly representable as bfloat, IEEE 754 half, and IEEE 754 single precision respectively. Hexadecimal format is always used for long double, and there are three forms of long double. The 80-bit format used by x86 is represented as `0xK` followed by 20 hexadecimal digits. The 128-bit format used by PowerPC (two adjacent doubles) is represented by `0xM` followed by 32 hexadecimal digits. The IEEE 128-bit format is represented by `0xL` followed by 32 hexadecimal digits. Long doubles will only work if they match the long double format on your target. The IEEE 16-bit format (half precision) is represented by `0xH` followed by 4 hexadecimal digits. The bfloat 16-bit format is represented by `0xR` followed by 4 hexadecimal digits. All hexadecimal formats are big-endian (sign bit at the left).
1014+
1015+There are no constants of type x86\_amx.
1016+
1017+### [Complex Constants](https://llvm.org/docs/LangRef.html#id2037)[¶](https://llvm.org/docs/LangRef.html#complex-constants "Link to this heading")
1018+
1019+Complex constants are a (potentially recursive) combination of simple constants and smaller complex constants.
1020+
1021+**Structure constants**
1022+
1023+Structure constants are represented with notation similar to structure type definitions (a comma separated list of elements, surrounded by braces (`{}`)). For example: “`{ i32 4, float 17.0, ptr @G }`”, where “`@G`” is declared as “`@G = external global i32`”. Structure constants must have [structure type](https://llvm.org/docs/LangRef.html#t-struct), and the number and types of elements must match those specified by the type.
1024+
1025+**Array constants**
1026+
1027+Array constants are represented with notation similar to array type definitions (a comma separated list of elements, surrounded by square brackets (`[]`)). For example: “`[ i32 42, i32 11, i32 74 ]`”. Array constants must have [array type](https://llvm.org/docs/LangRef.html#t-array), and the number and types of elements must match those specified by the type. As a special case, character array constants may also be represented as a double-quoted string using the `c` prefix. For example: “`c"Hello World\0A\00"`”.
1028+
1029+**Vector constants**
1030+
1031+Vector constants are represented with notation similar to vector type definitions (a comma separated list of elements, surrounded by less-than/greater-than’s (`<>`)). For example: “`< i32 42, i32 11, i32 74, i32 100 >`”. Vector constants must have [vector type](https://llvm.org/docs/LangRef.html#t-vector), and the number and types of elements must match those specified by the type.
1032+
1033+When creating a vector whose elements have the same constant value, the preferred syntax is `splat (<Ty> Val)`. For example: “`splat (i32 11)`”. These vector constants must have [vector type](https://llvm.org/docs/LangRef.html#t-vector) with an element type that matches the `splat` operand.
1034+
1035+**Zero initialization**
1036+
1037+The string ‘`zeroinitializer`’ can be used to zero initialize a value to zero of _any_ type, including scalar and [aggregate](https://llvm.org/docs/LangRef.html#t-aggregate) types. This is often used to avoid having to print large zero initializers (e.g. for large arrays) and is always exactly equivalent to using explicit zero initializers.
1038+
1039+**Metadata node**
1040+
1041+A metadata node is a constant tuple without types. For example: “`!{!0, !{!2, !0}, !"test"}`”. Metadata can reference constant values, for example: “`!{!0, i32 0, ptr @global, ptr @function, !"str"}`”. Unlike other typed constants that are meant to be interpreted as part of the instruction stream, metadata is a place to attach additional information such as debug info.
1042+
1043+### [Global Variable and Function Addresses](https://llvm.org/docs/LangRef.html#id2038)[¶](https://llvm.org/docs/LangRef.html#global-variable-and-function-addresses "Link to this heading")
1044+
1045+The addresses of [global variables](https://llvm.org/docs/LangRef.html#globalvars) and [functions](https://llvm.org/docs/LangRef.html#functionstructure) are always implicitly valid (link-time) constants. These constants are explicitly referenced when the [identifier for the global](https://llvm.org/docs/LangRef.html#identifiers) is used and always have [pointer](https://llvm.org/docs/LangRef.html#t-pointer) type. For example, the following is a legal LLVM file:
1046+
1047+```
1048+<span></span><span class="vg">@X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="m">17</span>
1049+<span class="vg">@Y</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="m">42</span>
1050+<span class="vg">@Z</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">global</span><span class="w"> </span><span class="p">[</span><span class="m">2</span><span class="w"> </span><span class="k">x</span><span class="w"> </span><span class="kt">ptr</span><span class="p">]</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@X</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@Y</span><span class="w"> </span><span class="p">]</span>
1051+```
1052+
1053+### [Undefined Values](https://llvm.org/docs/LangRef.html#id2039)[¶](https://llvm.org/docs/LangRef.html#undefined-values "Link to this heading")
1054+
1055+The string ‘`undef`’ can be used anywhere a constant is expected, and indicates that the user of the value may receive an unspecified bit-pattern. Undefined values may be of any type (other than ‘`label`’ or ‘`void`’) and be used anywhere a constant is permitted.
1056+
1057+Note
1058+
1059+A ‘`poison`’ value (described in the next section) should be used instead of ‘`undef`’ whenever possible. Poison values are stronger than undef, and enable more optimizations. Just the existence of ‘`undef`’ blocks certain optimizations (see the examples below).
1060+
1061+Undefined values are useful because they indicate to the compiler that the program is well defined no matter what value is used. This gives the compiler more freedom to optimize. Here are some examples of (potentially surprising) transformations that are valid (in pseudo IR):
1062+
1063+```
1064+<span></span><span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">add</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1065+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">sub</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1066+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">xor</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1067+<span class="nl">Safe:</span>
1068+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1069+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1070+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1071+```
1072+
1073+This is safe because all of the output bits are affected by the undef bits. Any output bit can have a zero or one depending on the input bits.
1074+
1075+```
1076+<span></span><span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">or</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1077+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1078+<span class="nl">Safe:</span>
1079+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="m">-1</span>
1080+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="m">0</span>
1081+<span class="nl">Safe:</span>
1082+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="c">;; By choosing undef as 0</span>
1083+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="c">;; By choosing undef as -1</span>
1084+<span class="nl">Unsafe:</span>
1085+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1086+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1087+```
1088+
1089+These logical operations have bits that are not always affected by the input. For example, if `%X` has a zero bit, then the output of the ‘`and`’ operation will always be a zero for that bit, no matter what the corresponding bit from the ‘`undef`’ is. As such, it is unsafe to optimize or assume that the result of the ‘`and`’ is ‘`undef`’. However, it is safe to assume that all bits of the ‘`undef`’ could be 0, and optimize the ‘`and`’ to 0. Likewise, it is safe to assume that all the bits of the ‘`undef`’ operand to the ‘`or`’ could be set, allowing the ‘`or`’ to be folded to -1.
1090+
1091+```
1092+<span></span><span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">select</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="nv">%Y</span>
1093+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">select</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="m">42</span><span class="p">,</span><span class="w"> </span><span class="nv">%Y</span>
1094+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">select</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="nv">%Y</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1095+<span class="nl">Safe:</span>
1096+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="p">(</span><span class="k">or</span><span class="w"> </span><span class="nv">%Y</span><span class="p">)</span>
1097+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="m">42</span><span class="w"> </span><span class="p">(</span><span class="k">or</span><span class="w"> </span><span class="nv">%Y</span><span class="p">)</span>
1098+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="nv">%Y</span><span class="w"> </span><span class="p">(</span><span class="err">if</span><span class="w"> </span><span class="nv">%Y</span><span class="w"> </span><span class="err">is</span><span class="w"> </span><span class="err">provably</span><span class="w"> </span><span class="err">not</span><span class="w"> </span><span class="k">poison</span><span class="c">; unsafe otherwise)</span>
1099+<span class="nl">Unsafe:</span>
1100+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1101+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1102+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1103+```
1104+
1105+This set of examples shows that undefined ‘`select`’ conditions can go _either way_, but they have to come from one of the two operands. In the `%A` example, if `%X` and `%Y` were both known to have a clear low bit, then `%A` would have to have a cleared low bit. However, in the `%C` example, the optimizer is allowed to assume that the ‘`undef`’ operand could be the same as `%Y` if `%Y` is provably not ‘`poison`’, allowing the whole ‘`select`’ to be eliminated. This is because ‘`poison`’ is stronger than ‘`undef`’.
1106+
1107+```
1108+<span></span><span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">xor</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1109+
1110+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1111+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">xor</span><span class="w"> </span><span class="nv">%B</span><span class="p">,</span><span class="w"> </span><span class="nv">%B</span>
1112+
1113+<span class="w"> </span><span class="nv">%D</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1114+<span class="w"> </span><span class="nv">%E</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">icmp</span><span class="w"> </span><span class="k">slt</span><span class="w"> </span><span class="nv">%D</span><span class="p">,</span><span class="w"> </span><span class="m">4</span>
1115+<span class="w"> </span><span class="nv">%F</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">icmp</span><span class="w"> </span><span class="k">sge</span><span class="w"> </span><span class="nv">%D</span><span class="p">,</span><span class="w"> </span><span class="m">4</span>
1116+
1117+<span class="nl">Safe:</span>
1118+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1119+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1120+<span class="w"> </span><span class="nv">%C</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1121+<span class="w"> </span><span class="nv">%D</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1122+<span class="w"> </span><span class="nv">%E</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1123+<span class="w"> </span><span class="nv">%F</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">undef</span>
1124+```
1125+
1126+This example points out that two ‘`undef`’ operands are not necessarily the same. This can be surprising to people (and also matches C semantics) where they assume that “`X^X`” is always zero, even if `X` is undefined. This isn’t true for a number of reasons, but the short answer is that an ‘`undef`’ “variable” can arbitrarily change its value over its “live range”. This is true because the variable doesn’t actually _have a live range_. Instead, the value is logically read from arbitrary registers that happen to be around when needed, so the value is not necessarily consistent over time. In fact, `%A` and `%C` need to have the same semantics or the core LLVM “replace all uses with” concept would not hold.
1127+
1128+To ensure all uses of a given register observe the same value (even if ‘`undef`’), the [freeze instruction](https://llvm.org/docs/LangRef.html#i-freeze) can be used.
1129+
1130+```
1131+<span></span><span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">sdiv</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="nv">%X</span>
1132+<span class="w"> </span><span class="nv">%B</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">sdiv</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="k">undef</span>
1133+<span class="nl">Safe:</span>
1134+<span class="w"> </span><span class="nv">%A</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="m">0</span>
1135+<span class="nl">b:</span><span class="w"> </span><span class="k">unreachable</span>
1136+```
1137+
1138+These examples show the crucial difference between an _undefined value_ and _undefined behavior_. An undefined value (like ‘`undef`’) is allowed to have an arbitrary bit-pattern. This means that the `%A` operation can be constant folded to ‘`0`’, because the ‘`undef`’ could be zero, and zero divided by any value is zero. However, in the second example, we can make a more aggressive assumption: because the `undef` is allowed to be an arbitrary value, we are allowed to assume that it could be zero. Since a divide by zero has _undefined behavior_, we are allowed to assume that the operation does not execute at all. This allows us to delete the divide and all code after it. Because the undefined operation “can’t happen”, the optimizer can assume that it occurs in dead code.
1139+
1140+```
1141+<span></span>a: store undef -> %X
1142+b: store %X -> undef
1143+Safe:
1144+a: <deleted> (if the stored value in %X is provably not poison)
1145+b: unreachable
1146+```
1147+
1148+A store _of_ an undefined value can be assumed to not have any effect; we can assume that the value is overwritten with bits that happen to match what was already there. This argument is only valid if the stored value is provably not `poison`. However, a store _to_ an undefined location could clobber arbitrary memory, therefore, it has undefined behavior.
1149+
1150+Branching on an undefined value is undefined behavior. This explains optimizations that depend on branch conditions to construct predicates, such as Correlated Value Propagation and Global Value Numbering. In case of switch instruction, the branch condition should be frozen, otherwise it is undefined behavior.
1151+
1152+```
1153+<span></span><span class="nl">Unsafe:</span>
1154+<span class="w"> </span><span class="k">br</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="err">BB</span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="err">BB</span><span class="m">2</span><span class="w"> </span><span class="c">; UB</span>
1155+
1156+<span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="m">255</span>
1157+<span class="w"> </span><span class="k">switch</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%ret</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">..</span><span class="w"> </span><span class="p">]</span><span class="w"> </span><span class="c">; UB</span>
1158+
1159+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%ptr</span>
1160+<span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">load</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%ptr</span><span class="w"> </span><span class="c">; %X is undef</span>
1161+<span class="w"> </span><span class="k">switch</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%ret</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">..</span><span class="w"> </span><span class="p">]</span><span class="w"> </span><span class="c">; UB</span>
1162+
1163+<span class="nl">Safe:</span>
1164+<span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">or</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="k">undef</span><span class="p">,</span><span class="w"> </span><span class="m">255</span><span class="w"> </span><span class="c">; always 255</span>
1165+<span class="w"> </span><span class="k">switch</span><span class="w"> </span><span class="kt">i8</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%ret</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">..</span><span class="w"> </span><span class="p">]</span><span class="w"> </span><span class="c">; Well-defined</span>
1166+
1167+<span class="w"> </span><span class="nv">%X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">freeze</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="k">undef</span>
1168+<span class="w"> </span><span class="k">br</span><span class="w"> </span><span class="nv">%X</span><span class="p">,</span><span class="w"> </span><span class="err">BB</span><span class="m">1</span><span class="p">,</span><span class="w"> </span><span class="err">BB</span><span class="m">2</span><span class="w"> </span><span class="c">; Well-defined (non-deterministic jump)</span>
1169+```
1170+
1171+### [Poison Values](https://llvm.org/docs/LangRef.html#id2040)[¶](https://llvm.org/docs/LangRef.html#poison-values "Link to this heading")
1172+
1173+A poison value is a result of an erroneous operation. In order to facilitate speculative execution, many instructions do not invoke immediate undefined behavior when provided with illegal operands, and return a poison value instead. The string ‘`poison`’ can be used anywhere a constant is expected, and operations such as [add](https://llvm.org/docs/LangRef.html#i-add) with the `nsw` flag can produce a poison value.
1174+
1175+Most instructions return ‘`poison`’ when one of their arguments is ‘`poison`’. A notable exception is the [select instruction](https://llvm.org/docs/LangRef.html#i-select). Propagation of poison can be stopped with the [freeze instruction](https://llvm.org/docs/LangRef.html#i-freeze).
1176+
1177+It is correct to replace a poison value with an [undef value](https://llvm.org/docs/LangRef.html#undefvalues) or any value of the type.
1178+
1179+This means that immediate undefined behavior occurs if a poison value is used as an instruction operand that has any values that trigger undefined behavior. Notably this includes (but is not limited to):
1180+
1181+- The pointer operand of a [load](https://llvm.org/docs/LangRef.html#i-load), [store](https://llvm.org/docs/LangRef.html#i-store) or any other pointer dereferencing instruction (independent of address space).
1182+
1183+- The divisor operand of a `udiv`, `sdiv`, `urem` or `srem` instruction.
1184+
1185+- The condition operand of a [br](https://llvm.org/docs/LangRef.html#i-br) instruction.
1186+
1187+- The callee operand of a [call](https://llvm.org/docs/LangRef.html#i-call) or [invoke](https://llvm.org/docs/LangRef.html#i-invoke) instruction.
1188+
1189+- The parameter operand of a [call](https://llvm.org/docs/LangRef.html#i-call) or [invoke](https://llvm.org/docs/LangRef.html#i-invoke) instruction, when the function or invoking call site has a `noundef` attribute in the corresponding position.
1190+
1191+- The operand of a [ret](https://llvm.org/docs/LangRef.html#i-ret) instruction if the function or invoking call site has a noundef attribute in the return value position.
1192+
1193+
1194+Here are some examples:
1195+
1196+```
1197+<span></span><span class="nl">entry:</span>
1198+<span class="w"> </span><span class="nv">%poison</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">sub</span><span class="w"> </span><span class="k">nuw</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="m">0</span><span class="p">,</span><span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="c">; Results in a poison value.</span>
1199+<span class="w"> </span><span class="nv">%poison2</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">sub</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="k">poison</span><span class="p">,</span><span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="c">; Also results in a poison value.</span>
1200+<span class="w"> </span><span class="nv">%still_poison</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="nv">%poison</span><span class="p">,</span><span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="c">; 0, but also poison.</span>
1201+<span class="w"> </span><span class="nv">%poison_yet_again</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">getelementptr</span><span class="w"> </span><span class="kt">i32</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@h</span><span class="p">,</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="nv">%still_poison</span>
1202+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="m">0</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="nv">%poison_yet_again</span><span class="w"> </span><span class="c">; Undefined behavior due to</span>
1203+<span class="w"> </span><span class="c">; store to poison.</span>
1204+
1205+<span class="w"> </span><span class="k">store</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="nv">%poison</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@g</span><span class="w"> </span><span class="c">; Poison value stored to memory.</span>
1206+<span class="w"> </span><span class="nv">%poison3</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">load</span><span class="w"> </span><span class="kt">i32</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@g</span><span class="w"> </span><span class="c">; Poison value loaded back from memory.</span>
1207+
1208+<span class="w"> </span><span class="nv">%poison4</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">load</span><span class="w"> </span><span class="kt">i16</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@g</span><span class="w"> </span><span class="c">; Returns a poison value.</span>
1209+<span class="w"> </span><span class="nv">%poison5</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">load</span><span class="w"> </span><span class="kt">i64</span><span class="p">,</span><span class="w"> </span><span class="kt">ptr</span><span class="w"> </span><span class="vg">@g</span><span class="w"> </span><span class="c">; Returns a poison value.</span>
1210+
1211+<span class="w"> </span><span class="nv">%cmp</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">icmp</span><span class="w"> </span><span class="k">slt</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="nv">%poison</span><span class="p">,</span><span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="c">; Returns a poison value.</span>
1212+<span class="w"> </span><span class="k">br</span><span class="w"> </span><span class="kt">i1</span><span class="w"> </span><span class="nv">%cmp</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%end</span><span class="p">,</span><span class="w"> </span><span class="kt">label</span><span class="w"> </span><span class="nv">%end</span><span class="w"> </span><span class="c">; undefined behavior</span>
1213+
1214+<span class="nl">end:</span>
1215+```
1216+
1217+### [Well-Defined Values](https://llvm.org/docs/LangRef.html#id2041)[¶](https://llvm.org/docs/LangRef.html#well-defined-values "Link to this heading")
1218+
1219+Given a program execution, a value is _well defined_ if the value does not have an undef bit and is not poison in the execution. An aggregate value or vector is well defined if its elements are well defined. The padding of an aggregate isn’t considered, since it isn’t visible without storing it into memory and loading it with a different type.
1220+
1221+A constant of a [single value](https://llvm.org/docs/LangRef.html#t-single-value), non-vector type is well defined if it is neither ‘`undef`’ constant nor ‘`poison`’ constant. The result of [freeze instruction](https://llvm.org/docs/LangRef.html#i-freeze) is well defined regardless of its operand.
1222+
1223+### [Addresses of Basic Blocks](https://llvm.org/docs/LangRef.html#id2042)[¶](https://llvm.org/docs/LangRef.html#addresses-of-basic-blocks "Link to this heading")
1224+
1225+`blockaddress(@function, %block)`
1226+
1227+The ‘`blockaddress`’ constant computes the address of the specified basic block in the specified function.
1228+
1229+It always has a `ptr addrspace(P)` type, where `P` is the address space of the function containing `%block` (usually `addrspace(0)`).
1230+
1231+Taking the address of the entry block is illegal.
1232+
1233+This value only has defined behavior when used as an operand to the ‘[indirectbr](https://llvm.org/docs/LangRef.html#i-indirectbr)’ or for comparisons against null. Pointer equality tests between label addresses results in undefined behavior — though, again, comparison against null is ok, and no label is equal to the null pointer. This may be passed around as an opaque pointer sized value as long as the bits are not inspected. This allows `ptrtoint` and arithmetic to be performed on these values so long as the original value is reconstituted before the `indirectbr` instruction.
1234+
1235+Finally, some targets may provide defined semantics when using the value as the operand to an inline assembly, but that is target specific.
1236+
1237+### [DSO Local Equivalent](https://llvm.org/docs/LangRef.html#id2043)[¶](https://llvm.org/docs/LangRef.html#dso-local-equivalent "Link to this heading")
1238+
1239+`dso_local_equivalent @func`
1240+
1241+A ‘`dso_local_equivalent`’ constant represents a function which is functionally equivalent to a given function, but is always defined in the current linkage unit. The resulting pointer has the same type as the underlying function. The resulting pointer is permitted, but not required, to be different from a pointer to the function, and it may have different values in different translation units.
1242+
1243+The target function may not have `extern_weak` linkage.
1244+
1245+`dso_local_equivalent` can be implemented as such:
1246+
1247+- If the function has local linkage, hidden visibility, or is `dso_local`, `dso_local_equivalent` can be implemented as simply a pointer to the function.
1248+
1249+- `dso_local_equivalent` can be implemented with a stub that tail-calls the function. Many targets support relocations that resolve at link time to either a function or a stub for it, depending on whether the function is defined within the linkage unit; LLVM will use this when available. (This is commonly called a “PLT stub”.) On other targets, the stub may need to be emitted explicitly.
1250+
1251+
1252+This can be used wherever a `dso_local` instance of a function is needed without needing to explicitly make the original function `dso_local`. An instance where this can be used is for static offset calculations between a function and some other `dso_local` symbol. This is especially useful for the Relative VTables C++ ABI, where dynamic relocations for function pointers in VTables can be replaced with static relocations for offsets between the VTable and virtual functions which may not be `dso_local`.
1253+
1254+This is currently only supported for ELF binary formats.
1255+
1256+### [No CFI](https://llvm.org/docs/LangRef.html#id2044)[¶](https://llvm.org/docs/LangRef.html#no-cfi "Link to this heading")
1257+
1258+`no_cfi @func`
1259+
1260+With [Control-Flow Integrity (CFI)](https://clang.llvm.org/docs/ControlFlowIntegrity.html), a ‘`no_cfi`’ constant represents a function reference that does not get replaced with a reference to the CFI jump table in the `LowerTypeTests` pass. These constants may be useful in low-level programs, such as operating system kernels, which need to refer to the actual function body.
1261+
1262+### [Pointer Authentication Constants](https://llvm.org/docs/LangRef.html#id2045)[¶](https://llvm.org/docs/LangRef.html#pointer-authentication-constants "Link to this heading")
1263+
1264+`ptrauth (ptr CST, i32 KEY[, i64 DISC[, ptr ADDRDISC]?]?)`
1265+
1266+A ‘`ptrauth`’ constant represents a pointer with a cryptographic authentication signature embedded into some bits, as described in the [Pointer Authentication](https://llvm.org/docs/LangRef.htmlPointerAuth.html) document.
1267+
1268+A ‘`ptrauth`’ constant is simply a constant equivalent to the `llvm.ptrauth.sign` intrinsic, potentially fed by a discriminator `llvm.ptrauth.blend` if needed.
1269+
1270+Its type is the same as the first argument. An integer constant discriminator and an address discriminator may be optionally specified. Otherwise, they have values `i64 0` and `ptr null`.
1271+
1272+If the address discriminator is `null` then the expression is equivalent to
1273+
1274+```
1275+<span></span><span class="nv">%tmp</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="vg">@llvm.ptrauth.sign</span><span class="p">(</span><span class="kt">i64</span><span class="w"> </span><span class="k">ptrtoint</span><span class="w"> </span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="err">CST</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i64</span><span class="p">),</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="err">KEY</span><span class="p">,</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="err">DISC</span><span class="p">)</span>
1276+<span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">inttoptr</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%tmp</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">ptr</span>
1277+```
1278+
1279+Otherwise, the expression is equivalent to:
1280+
1281+```
1282+<span></span><span class="nv">%tmp1</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="vg">@llvm.ptrauth.blend</span><span class="p">(</span><span class="kt">i64</span><span class="w"> </span><span class="k">ptrtoint</span><span class="w"> </span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="err">ADDRDISC</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i64</span><span class="p">),</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="err">DISC</span><span class="p">)</span>
1283+<span class="nv">%tmp2</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="vg">@llvm.ptrauth.sign</span><span class="p">(</span><span class="kt">i64</span><span class="w"> </span><span class="k">ptrtoint</span><span class="w"> </span><span class="p">(</span><span class="kt">ptr</span><span class="w"> </span><span class="err">CST</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">i64</span><span class="p">),</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="err">KEY</span><span class="p">,</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%tmp1</span><span class="p">)</span>
1284+<span class="nv">%val</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">inttoptr</span><span class="w"> </span><span class="kt">i64</span><span class="w"> </span><span class="nv">%tmp2</span><span class="w"> </span><span class="k">to</span><span class="w"> </span><span class="kt">ptr</span>
1285+```
1286+
1287+### [Constant Expressions](https://llvm.org/docs/LangRef.html#id2046)[¶](https://llvm.org/docs/LangRef.html#constant-expressions "Link to this heading")
1288+
1289+Constant expressions are used to allow expressions involving other constants to be used as constants. Constant expressions may be of any [first class](https://llvm.org/docs/LangRef.html#t-firstclass) type and may involve any LLVM operation that does not have side effects (e.g. load and call are not supported). The following is the syntax for constant expressions:
1290+
1291+`trunc (CST to TYPE)`
1292+
1293+Perform the [trunc operation](https://llvm.org/docs/LangRef.html#i-trunc) on constants.
1294+
1295+`ptrtoint (CST to TYPE)`
1296+
1297+Perform the [ptrtoint operation](https://llvm.org/docs/LangRef.html#i-ptrtoint) on constants.
1298+
1299+`ptrtoaddr (CST to TYPE)`
1300+
1301+Perform the [ptrtoaddr operation](https://llvm.org/docs/LangRef.html#i-ptrtoaddr) on constants.
1302+
1303+`inttoptr (CST to TYPE)`
1304+
1305+Perform the [inttoptr operation](https://llvm.org/docs/LangRef.html#i-inttoptr) on constants. This one is _really_ dangerous!
1306+
1307+`bitcast (CST to TYPE)`
1308+
1309+Convert a constant, CST, to another TYPE. The constraints of the operands are the same as those for the [bitcast instruction](https://llvm.org/docs/LangRef.html#i-bitcast).
1310+
1311+`addrspacecast (CST to TYPE)`
1312+
1313+Convert a constant pointer or constant vector of pointer, CST, to another TYPE in a different address space. The constraints of the operands are the same as those for the [addrspacecast instruction](https://llvm.org/docs/LangRef.html#i-addrspacecast).
1314+
1315+`getelementptr (TY, CSTPTR, IDX0, IDX1, ...)`, `getelementptr inbounds (TY, CSTPTR, IDX0, IDX1, ...)`
1316+
1317+Perform the [getelementptr operation](https://llvm.org/docs/LangRef.html#i-getelementptr) on constants. As with the [getelementptr](https://llvm.org/docs/LangRef.html#i-getelementptr) instruction, the index list may have one or more indexes, which are required to make sense for the type of “pointer to TY”. These indexes may be implicitly sign-extended or truncated to match the index size of CSTPTR’s address space.
1318+
1319+`extractelement (VAL, IDX)`
1320+
1321+Perform the [extractelement operation](https://llvm.org/docs/LangRef.html#i-extractelement) on constants.
1322+
1323+`insertelement (VAL, ELT, IDX)`
1324+
1325+Perform the [insertelement operation](https://llvm.org/docs/LangRef.html#i-insertelement) on constants.
1326+
1327+`shufflevector (VEC1, VEC2, IDXMASK)`
1328+
1329+Perform the [shufflevector operation](https://llvm.org/docs/LangRef.html#i-shufflevector) on constants.
1330+
1331+`add (LHS, RHS)`
1332+
1333+Perform an addition on constants.
1334+
1335+`sub (LHS, RHS)`
1336+
1337+Perform a subtraction on constants.
1338+
1339+`xor (LHS, RHS)`
1340+
1341+Perform a bitwise xor on constants.
1342+
1343+## [Other Values](https://llvm.org/docs/LangRef.html#id2047)[¶](https://llvm.org/docs/LangRef.html#other-values "Link to this heading")
1344+
1345+### [Inline Assembler Expressions](https://llvm.org/docs/LangRef.html#id2048)[¶](https://llvm.org/docs/LangRef.html#inline-assembler-expressions "Link to this heading")
1346+
1347+LLVM supports inline assembler expressions (as opposed to [Module-Level Inline Assembly](https://llvm.org/docs/LangRef.html#moduleasm)) through the use of a special value. This value represents the inline assembler as a template string (containing the instructions to emit), a list of operand constraints (stored as a string), a flag that indicates whether or not the inline asm expression has side effects, and a flag indicating whether the function containing the asm needs to align its stack conservatively.
1348+
1349+The template string supports argument substitution of the operands using “`$`” followed by a number, to indicate substitution of the given register/memory location, as specified by the constraint string. “`${NUM:MODIFIER}`” may also be used, where `MODIFIER` is a target-specific annotation for how to print the operand (See [Asm template argument modifiers](https://llvm.org/docs/LangRef.html#inline-asm-modifiers)).
1350+
1351+A literal “`$`” may be included by using “`$$`” in the template. To include other special characters into the output, the usual “`\XX`” escapes may be used, just as in other strings. Note that after template substitution, the resulting assembly string is parsed by LLVM’s integrated assembler unless it is disabled – even when emitting a `.s` file – and thus must contain assembly syntax known to LLVM.
1352+
1353+LLVM also supports a few more substitutions useful for writing inline assembly:
1354+
1355+- `${:uid}`: Expands to a decimal integer unique to this inline assembly blob. This substitution is useful when declaring a local label. Many standard compiler optimizations, such as inlining, may duplicate an inline asm blob. Adding a blob-unique identifier ensures that the two labels will not conflict during assembly. This is used to implement [GCC’s %= special format string](https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html).
1356+
1357+- `${:comment}`: Expands to the comment character of the current target’s assembly dialect. This is usually `#`, but many targets use other strings, such as `;`, `//`, or `!`.
1358+
1359+- `${:private}`: Expands to the assembler private label prefix. Labels with this prefix will not appear in the symbol table of the assembled object. Typically the prefix is `L`, but targets may use other strings. `.L` is relatively popular.
1360+
1361+
1362+LLVM’s support for inline asm is modeled closely on the requirements of Clang’s GCC-compatible inline-asm support. Thus, the feature-set and the constraint and modifier codes listed here are similar or identical to those in GCC’s inline asm support. However, to be clear, the syntax of the template and constraint strings described here is _not_ the same as the syntax accepted by GCC and Clang, and, while most constraint letters are passed through as-is by Clang, some get translated to other codes when converting from the C source to the LLVM assembly.
1363+
1364+An example inline assembler expression is:
1365+
1366+```
1367+<span></span><span class="kt">i32</span><span class="w"> </span><span class="p">(</span><span class="kt">i32</span><span class="p">)</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="s">"bswap $0"</span><span class="p">,</span><span class="w"> </span><span class="s">"=r,r"</span>
1368+```
1369+
1370+Inline assembler expressions may **only** be used as the callee operand of a [call](https://llvm.org/docs/LangRef.html#i-call) or an [invoke](https://llvm.org/docs/LangRef.html#i-invoke) instruction. Thus, typically we have:
1371+
1372+```
1373+<span></span><span class="nv">%X</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="k">call</span><span class="w"> </span><span class="kt">i32</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="s">"bswap $0"</span><span class="p">,</span><span class="w"> </span><span class="s">"=r,r"</span><span class="p">(</span><span class="kt">i32</span><span class="w"> </span><span class="nv">%Y</span><span class="p">)</span>
1374+```
1375+
1376+Inline asms with side effects not visible in the constraint list must be marked as having side effects. This is done through the use of the ‘`sideeffect`’ keyword, like so:
1377+
1378+```
1379+<span></span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="k">sideeffect</span><span class="w"> </span><span class="s">"eieio"</span><span class="p">,</span><span class="w"> </span><span class="s">""</span><span class="p">()</span>
1380+```
1381+
1382+In some cases inline asms will contain code that will not work unless the stack is aligned in some way, such as calls or SSE instructions on x86, yet will not contain code that does that alignment within the asm. The compiler should make conservative assumptions about what the asm might contain and should generate its usual stack alignment code in the prologue if the ‘`alignstack`’ keyword is present:
1383+
1384+```
1385+<span></span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="k">alignstack</span><span class="w"> </span><span class="s">"eieio"</span><span class="p">,</span><span class="w"> </span><span class="s">""</span><span class="p">()</span>
1386+```
1387+
1388+Inline asms also support using non-standard assembly dialects. The assumed dialect is ATT. When the ‘`inteldialect`’ keyword is present, the inline asm is using the Intel dialect. Currently, ATT and Intel are the only supported dialects. An example is:
1389+
1390+```
1391+<span></span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="k">inteldialect</span><span class="w"> </span><span class="s">"eieio"</span><span class="p">,</span><span class="w"> </span><span class="s">""</span><span class="p">()</span>
1392+```
1393+
1394+In the case that the inline asm might unwind the stack, the ‘`unwind`’ keyword must be used, so that the compiler emits unwinding information:
1395+
1396+```
1397+<span></span><span class="k">call</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="k">asm</span><span class="w"> </span><span class="k">unwind</span><span class="w"> </span><span class="s">"call func"</span><span class="p">,</span><span class="w"> </span><span class="s">""</span><span class="p">()</span>
1398+```
1399+
1400+If the inline asm unwinds the stack and isn’t marked with the ‘`unwind`’ keyword, the behavior is undefined.
1401+
1402+If multiple keywords appear, the ‘`sideeffect`’ keyword must come first, the ‘`alignstack`’ keyword second, the ‘`inteldialect`’ keyword third, and the ‘`unwind`’ keyword last.
1403+
1404+#### [Inline Asm Constraint String](https://llvm.org/docs/LangRef.html#id2049)[¶](https://llvm.org/docs/LangRef.html#inline-asm-constraint-string "Link to this heading")
1405+
1406+The constraint list is a comma-separated string, each element containing one or more constraint codes.
1407+
1408+For each element in the constraint list an appropriate register or memory operand will be chosen, and it will be made available to assembly template string expansion as `$0` for the first constraint in the list, `$1` for the second, etc.
1409+
1410+There are three different types of constraints, which are distinguished by a prefix symbol in front of the constraint code: Output, Input, and Clobber. The constraints must always be given in that order: outputs first, then inputs, then clobbers. They cannot be intermingled.
1411+
1412+There are also three different categories of constraint codes:
1413+
1414+- Register constraint. This is either a register class, or a fixed physical register. This kind of constraint will allocate a register, and if necessary, bitcast the argument or result to the appropriate type.
1415+
1416+- Memory constraint. This kind of constraint is for use with an instruction taking a memory operand. Different constraints allow for different addressing modes used by the target.
1417+
1418+- Immediate value constraint. This kind of constraint is for an integer or other immediate value which can be rendered directly into an instruction. The various target-specific constraints allow the selection of a value in the proper range for the instruction you wish to use it with.
1419+
1420+
1421+##### Output constraints[¶](https://llvm.org/docs/LangRef.html#output-constraints "Link to this heading")
1422+
1423+Output constraints are specified by an “`=`” prefix (e.g. “`=r`”). This indicates that the assembly will write to this operand, and the operand will then be made available as a return value of the `asm` expression. Output constraints do not consume an argument from the call instruction. (Except, see below about indirect outputs).
1424+
1425+Normally, it is expected that no output locations are written to by the assembly expression until _all_ of the inputs have been read. As such, LLVM may assign the same register to an output and an input. If this is not safe (e.g. if the assembly contains two instructions, where the first writes to one output, and the second reads an input and writes to a second output), then the “`&`” modifier must be used (e.g. “`=&r`”) to specify that the output is an “early-clobber” output. Marking an output as “early-clobber” ensures that LLVM will not use the same register for any inputs (other than an input tied to this output).
1426+
1427+##### Input constraints[¶](https://llvm.org/docs/LangRef.html#input-constraints "Link to this heading")
1428+
1429+Input constraints do not have a prefix – just the constraint codes. Each input constraint will consume one argument from the call instruction. It is not permitted for the asm to write to any input register or memory location (unless that input is tied to an output). Note also that multiple inputs may all be assigned to the same register, if LLVM can determine that they necessarily all contain the same value.
1430+
1431+Instead of providing a Constraint Code, input constraints may also “tie” themselves to an output constraint, by providing an integer as the constraint string. Tied inputs still consume an argument from the call instruction, and take up a position in the asm template numbering as is usual – they will simply be constrained to always use the same register as the output they’ve been tied to. For example, a constraint string of “`=r,0`” says to assign a register for output, and use that register as an input as well (it being the 0’th constraint).
1432+
1433+It is permitted to tie an input to an “early-clobber” output. In that case, no _other_ input may share the same register as the input tied to the early-clobber (even when the other input has the same value).
1434+
1435+You may only tie an input to an output which has a register constraint, not a memory constraint. Only a single input may be tied to an output.
1436+
1437+There is also an “interesting” feature which deserves a bit of explanation: if a register class constraint allocates a register which is too small for the value type operand provided as input, the input value will be split into multiple registers, and all of them passed to the inline asm.
1438+
1439+However, this feature is often not as useful as you might think.
1440+
1441+Firstly, the registers are _not_ guaranteed to be consecutive. So, on those architectures that have instructions which operate on multiple consecutive instructions, this is not an appropriate way to support them. (e.g. the 32-bit SparcV8 has a 64-bit load, which instruction takes a single 32-bit register. The hardware then loads into both the named register, and the next register. This feature of inline asm would not be useful to support that.)
1442+
1443+A few of the targets provide a template string modifier allowing explicit access to the second register of a two-register operand (e.g. MIPS `L`, `M`, and `D`). On such an architecture, you can actually access the second allocated register (yet, still, not any subsequent ones). But, in that case, you’re still probably better off simply splitting the value into two separate operands, for clarity. (e.g. see the description of the `A` constraint on X86, which, despite existing only for use with this feature, is not really a good idea to use)
1444+
1445+##### Indirect inputs and outputs[¶](https://llvm.org/docs/LangRef.html#indirect-inputs-and-outputs "Link to this heading")
1446+
1447+Indirect output or input constraints can be specified by the “`*`” modifier (which goes after the “`=`” in case of an output). This indicates that the asm will write to or read from the contents of an _address_ provided as an input argument. (Note that in this way, indirect outputs act more like an _input_ than an output: just like an input, they consume an argument of the call expression, rather than producing a return value. An indirect output constraint is an “output” only in that the asm is expected to write to the contents of the input memory location, instead of just read from it).
1448+
1449+This is most typically used for memory constraint, e.g. “`=*m`”, to pass the address of a variable as a value.
1450+
1451+It is also possible to use an indirect _register_ constraint, but only on output (e.g. “`=*r`”). This will cause LLVM to allocate a register for an output value normally, and then, separately emit a store to the address provided as input, after the provided inline asm. (It’s not clear what value this functionality provides, compared to writing the store explicitly after the asm statement, and it can only produce worse code, since it bypasses many optimization passes. I would recommend not using it.)
1452+
1453+Call arguments for indirect constraints must have pointer type and must specify the [elementtype](https://llvm.org/docs/LangRef.html#attr-elementtype) attribute to indicate the pointer element type.
1454+
1455+##### Clobber constraints[¶](https://llvm.org/docs/LangRef.html#clobber-constraints "Link to this heading")
1456+
1457+A clobber constraint is indicated by a “`~`” prefix. A clobber does not consume an input operand, nor generate an output. Clobbers cannot use any of the general constraint code letters – they may use only explicit register constraints, e.g. “`~{eax}`”. The one exception is that a clobber string of “`~{memory}`” indicates that the assembly writes to arbitrary undeclared memory locations – not only the memory pointed to by a declared indirect output.
1458+
1459+Note that clobbering named registers that are also present in output constraints is not legal.
1460+
1461+##### Label constraints[¶](https://llvm.org/docs/LangRef.html#label-constraints "Link to this heading")
1462+
1463+A label constraint is indicated by a “`!`” prefix and typically used in the form `"!i"`. Instead of consuming call arguments, label constraints consume indirect destination labels of `callbr` instructions.
1464+
1465+Label constraints can only be used in conjunction with `callbr` and the number of label constraints must match the number of indirect destination labels in the `callbr` instruction.
1466+
1467+##### Constraint Codes[¶](https://llvm.org/docs/LangRef.html#constraint-codes "Link to this heading")
1468+
1469+After a potential prefix comes constraint code, or codes.
1470+
1471+A Constraint Code is either a single letter (e.g. “`r`”), a “`^`” character followed by two letters (e.g. “`^wc`”), or “`{`” register-name “`}`” (e.g. “`{eax}`”).
1472+
1473+The one and two letter constraint codes are typically chosen to be the same as GCC’s constraint codes.
1474+
1475+A single constraint may include one or more constraint codes in it, leaving it up to LLVM to choose which one to use. This is included mainly for compatibility with the translation of GCC inline asm coming from clang.
1476+
1477+There are two ways to specify alternatives, and either or both may be used in an inline asm constraint list:
1478+
1479+1. Append the codes to each other, making a constraint code set. E.g. “`im`” or “`{eax}m`”. This means “choose any of the options in the set”. The choice of constraint is made independently for each constraint in the constraint list.
1480+
1481+2. Use “`|`” between constraint code sets, creating alternatives. Every constraint in the constraint list must have the same number of alternative sets. With this syntax, the same alternative in _all_ of the items in the constraint list will be chosen together.
1482+
1483+
1484+Putting those together, you might have a two operand constraint string like `"rm|r,ri|rm"`. This indicates that if operand 0 is `r` or `m`, then operand 1 may be one of `r` or `i`. If operand 0 is `r`, then operand 1 may be one of `r` or `m`. But, operand 0 and 1 cannot both be of type m.
1485+
1486+However, the use of either of the alternatives features is _NOT_ recommended, as LLVM is not able to make an intelligent choice about which one to use. (At the point it currently needs to choose, not enough information is available to do so in a smart way.) Thus, it simply tries to make a choice that’s most likely to compile, not one that will be optimal performance. (e.g., given “`rm`”, it’ll always choose to use memory, not registers). And, if given multiple registers, or multiple register classes, it will simply choose the first one. (In fact, it doesn’t currently even ensure explicitly specified physical registers are unique, so specifying multiple physical registers as alternatives, like `{r11}{r12},{r11}{r12}`, will assign r11 to both operands, not at all what was intended.)
+830,
-0
1@@ -0,0 +1,830 @@
2+### Table of Contents
3+
4+1. [Basic Concepts](https://c9x.me/compile/doc/il.html#Basic-Concepts)
5+ - [Input Files](https://c9x.me/compile/doc/il.html#Input-Files)
6+ - [BNF Notation](https://c9x.me/compile/doc/il.html#BNF-Notation)
7+ - [Sigils](https://c9x.me/compile/doc/il.html#Sigils)
8+ - [Spacing](https://c9x.me/compile/doc/il.html#Spacing)
9+2. [Types](https://c9x.me/compile/doc/il.html#Types)
10+ - [Simple Types](https://c9x.me/compile/doc/il.html#Simple-Types)
11+ - [Subtyping](https://c9x.me/compile/doc/il.html#Subtyping)
12+3. [Constants and Vals](https://c9x.me/compile/doc/il.html#Constants-and-Vals)
13+4. [Linkage](https://c9x.me/compile/doc/il.html#Linkage)
14+5. [Definitions](https://c9x.me/compile/doc/il.html#Definitions)
15+ - [Aggregate Types](https://c9x.me/compile/doc/il.html#Aggregate-Types)
16+ - [Data](https://c9x.me/compile/doc/il.html#Data)
17+ - [Functions](https://c9x.me/compile/doc/il.html#Functions)
18+6. [Control](https://c9x.me/compile/doc/il.html#Control)
19+ - [Blocks](https://c9x.me/compile/doc/il.html#Blocks)
20+ - [Jumps](https://c9x.me/compile/doc/il.html#Jumps)
21+7. [Instructions](https://c9x.me/compile/doc/il.html#Instructions)
22+ - [Arithmetic and Bits](https://c9x.me/compile/doc/il.html#Arithmetic-and-Bits)
23+ - [Memory](https://c9x.me/compile/doc/il.html#Memory)
24+ - [Comparisons](https://c9x.me/compile/doc/il.html#Comparisons)
25+ - [Conversions](https://c9x.me/compile/doc/il.html#Conversions)
26+ - [Cast and Copy](https://c9x.me/compile/doc/il.html#Cast-and-Copy)
27+ - [Call](https://c9x.me/compile/doc/il.html#Call)
28+ - [Variadic](https://c9x.me/compile/doc/il.html#Variadic)
29+ - [Phi](https://c9x.me/compile/doc/il.html#Phi)
30+8. [Instructions Index](https://c9x.me/compile/doc/il.html#Instructions-Index)
31+
32+### 1\. Basic Concepts
33+
34+The intermediate language (IL) is a higher-level language than the machine's assembly language. It smoothes most of the irregularities of the underlying hardware and allows an infinite number of temporaries to be used. This higher abstraction level lets frontend programmers focus on language design issues.
35+
36+#### Input Files
37+
38+The intermediate language is provided to QBE as text. Usually, one file is generated per each compilation unit from the frontend input language. An IL file is a sequence of [Definitions](https://c9x.me/compile/doc/il.html#Definitions) for data, functions, and types. Once processed by QBE, the resulting file can be assembled and linked using a standard toolchain (e.g., GNU binutils).
39+
40+Here is a complete "Hello World" IL file which defines a function that prints to the screen. Since the string is not a first class object (only the pointer is) it is defined outside the function's body. Comments start with a # character and finish with the end of the line.
41+
42+```
43+# Define the string constant.
44+data $str = { b "hello world", b 0 }
45+
46+export function w $main() {
47+@start
48+ # Call the puts function with $str as argument.
49+ %r =w call $puts(l $str)
50+ ret 0
51+}
52+```
53+
54+If you have read the LLVM language reference, you might recognize the example above. In comparison, QBE makes a much lighter use of types and the syntax is terser.
55+
56+#### BNF Notation
57+
58+The language syntax is vaporously described in the sections below using BNF syntax. The different BNF constructs used are listed below.
59+
60+- Keywords are enclosed between quotes;
61+- `... | ...` expresses alternatives;
62+- `( ... )` groups syntax;
63+- `[ ... ]` marks the nested syntax as optional;
64+- `( ... ),` designates a comma-separated list of the enclosed syntax;
65+- `...*` and `...+` are used for arbitrary and at-least-once repetition respectively.
66+
67+#### Sigils
68+
69+The intermediate language makes heavy use of sigils, all user-defined names are prefixed with a sigil. This is to avoid keyword conflicts, and also to quickly spot the scope and nature of identifiers.
70+
71+- `:` is for user-defined [Aggregate Types](https://c9x.me/compile/doc/il.html#Aggregate-Types)
72+- `$` is for globals (represented by a pointer)
73+- `%` is for function-scope temporaries
74+- `@` is for block labels
75+
76+In this BNF syntax, we use `?IDENT` to designate an identifier starting with the sigil `?`.
77+
78+#### Spacing
79+
80+```
81+NL := '\n'+
82+```
83+
84+Individual tokens in IL files must be separated by one or more spacing characters. Both spaces and tabs are recognized as spacing characters. In data and type definitions, newlines may also be used as spaces to prevent overly long lines. When exactly one of two consecutive tokens is a symbol (for example `,` or `=` or `{`), spacing may be omitted.
85+
86+### 2\. Types
87+
88+#### Simple Types
89+
90+```
91+BASETY := 'w' | 'l' | 's' | 'd' # Base types
92+EXTTY := BASETY | 'b' | 'h' # Extended types
93+```
94+
95+The IL makes minimal use of types. By design, the types used are restricted to what is necessary for unambiguous compilation to machine code and C interfacing. Unlike LLVM, QBE is not using types as a means to safety; they are only here for semantic purposes.
96+
97+The four base types are `w` (word), `l` (long), `s` (single), and `d` (double), they stand respectively for 32-bit and 64-bit integers, and 32-bit and 64-bit floating-point numbers. There are no pointer types available; pointers are typed by an integer type sufficiently wide to represent all memory addresses (e.g., `l` on 64-bit architectures). Temporaries in the IL can only have a base type.
98+
99+Extended types contain base types plus `b` (byte) and `h` (half word), respectively for 8-bit and 16-bit integers. They are used in [Aggregate Types](https://c9x.me/compile/doc/il.html#Aggregate-Types) and [Data](https://c9x.me/compile/doc/il.html#Data) definitions.
100+
101+For C interfacing, the IL also provides user-defined aggregate types as well as signed and unsigned variants of the sub-word extended types. Read more about these types in the [Aggregate Types](https://c9x.me/compile/doc/il.html#Aggregate-Types) and [Functions](https://c9x.me/compile/doc/il.html#Functions) sections.
102+
103+#### Subtyping
104+
105+The IL has a minimal subtyping feature, for integer types only. Any value of type `l` can be used in a `w` context. In that case, only the 32 least significant bits of the word value are used.
106+
107+Make note that it is the opposite of the usual subtyping on integers (in C, we can safely use an `int` where a `long` is expected). A long value cannot be used in word context. The rationale is that a word can be signed or unsigned, so extending it to a long could be done in two ways, either by zero-extension, or by sign-extension.
108+
109+### 3\. Constants and Vals
110+
111+```
112+CONST :=
113+ ['-'] NUMBER # Decimal integer
114+ | 's_' FP # Single-precision float
115+ | 'd_' FP # Double-precision float
116+ | $IDENT # Global symbol
117+
118+DYNCONST :=
119+ CONST
120+ | 'thread' $IDENT # Thread-local symbol
121+
122+VAL :=
123+ DYNCONST
124+ | %IDENT
125+```
126+
127+Constants come in two kinds: compile-time constants and dynamic constants. Dynamic constants include compile-time constants and other symbol variants that are only known at program-load time or execution time. Consequently, dynamic constants can only occur in function bodies.
128+
129+The representation of integers is two's complement. Floating-point numbers are represented using the single-precision and double-precision formats of the IEEE 754 standard.
130+
131+Constants specify a sequence of bits and are untyped. They are always parsed as 64-bit blobs. Depending on the context surrounding a constant, only some of its bits are used. For example, in the program below, the two variables defined have the same value since the first operand of the subtraction is a word (32-bit) context.
132+
133+```
134+%x =w sub -1, 0
135+%y =w sub 4294967295, 0
136+```
137+
138+Because specifying floating-point constants by their bits makes the code less readable, syntactic sugar is provided to express them. Standard scientific notation is prefixed with `s_` and `d_` for single and double precision numbers respectively. Once again, the following example defines twice the same double-precision constant.
139+
140+```
141+%x =d add d_0, d_-1
142+%y =d add d_0, -4616189618054758400
143+```
144+
145+Global symbols can also be used directly as constants; they will be resolved and turned into actual numeric constants by the linker.
146+
147+When the `thread` keyword prefixes a symbol name, the symbol's numeric value is resolved at runtime in the thread-local storage.
148+
149+Vals are used as arguments in regular, phi, and jump instructions within function definitions. They are either constants or function-scope temporaries.
150+
151+### 4\. Linkage
152+
153+```
154+LINKAGE :=
155+ 'export' [NL]
156+ | 'thread' [NL]
157+ | 'section' SECNAME [NL]
158+ | 'section' SECNAME SECFLAGS [NL]
159+
160+SECNAME := '"' .... '"'
161+SECFLAGS := '"' .... '"'
162+```
163+
164+Function and data definitions (see below) can specify linkage information to be passed to the assembler and eventually to the linker.
165+
166+The `export` linkage flag marks the defined item as visible outside the current file's scope. If absent, the symbol can only be referred to locally. Functions compiled by QBE and called from C need to be exported.
167+
168+The `thread` linkage flag can only qualify data definitions. It mandates that the object defined is stored in thread-local storage. Each time a runtime thread starts, the supporting platform runtime is in charge of making a new copy of the object for the fresh thread. Objects in thread-local storage must be accessed using the `thread $IDENT` syntax, as specified in the [Constants and Vals](https://c9x.me/compile/doc/il.html#Constants-and-Vals) section.
169+
170+A `section` flag can be specified to tell the linker to put the defined item in a certain section. The use of the section flag is platform dependent and we refer the user to the documentation of their assembler and linker for relevant information.
171+
172+```
173+section ".init_array"
174+data $.init.f = { l $f }
175+```
176+
177+The section flag can be used to add function pointers to a global initialization list, as depicted above. Note that some platforms provide a BSS section that can be used to minimize the footprint of uniformly zeroed data. When this section is available, QBE will automatically make use of it and no section flag is required.
178+
179+The section and export linkage flags should each appear at most once in a definition. If multiple occurrences are present, QBE is free to use any.
180+
181+### 5\. Definitions
182+
183+Definitions are the essential components of an IL file. They can define three types of objects: aggregate types, data, and functions. Aggregate types are never exported and do not compile to any code. Data and function definitions have file scope and are mutually recursive (even across IL files). Their visibility can be controlled using linkage flags.
184+
185+#### Aggregate Types
186+
187+```
188+TYPEDEF :=
189+ # Regular type
190+ 'type' :IDENT '=' ['align' NUMBER]
191+ '{'
192+ ( SUBTY [NUMBER] ),
193+ '}'
194+ | # Union type
195+ 'type' :IDENT '=' ['align' NUMBER]
196+ '{'
197+ (
198+ '{'
199+ ( SUBTY [NUMBER] ),
200+ '}'
201+ )+
202+ '}'
203+ | # Opaque type
204+ 'type' :IDENT '=' 'align' NUMBER '{' NUMBER '}'
205+
206+SUBTY := EXTTY | :IDENT
207+```
208+
209+Aggregate type definitions start with the `type` keyword. They have file scope, but types must be defined before being referenced. The inner structure of a type is expressed by a comma-separated list of types enclosed in curly braces.
210+
211+```
212+type :fourfloats = { s, s, d, d }
213+```
214+
215+For ease of IL generation, a trailing comma is tolerated by the parser. In case many items of the same type are sequenced (like in a C array), the shorter array syntax can be used.
216+
217+```
218+type :abyteandmanywords = { b, w 100 }
219+```
220+
221+By default, the alignment of an aggregate type is the maximum alignment of its members. The alignment can be explicitly specified by the programmer.
222+
223+```
224+type :cryptovector = align 16 { w 4 }
225+```
226+
227+Union types allow the same chunk of memory to be used with different layouts. They are defined by enclosing multiple regular aggregate type bodies in a pair of curly braces. Size and alignment of union types are set to the maximum size and alignment of each variation or, in the case of alignment, can be explicitly specified.
228+
229+```
230+type :un9 = { { b } { s } }
231+```
232+
233+Opaque types are used when the inner structure of an aggregate cannot be specified; the alignment for opaque types is mandatory. They are defined simply by enclosing their size between curly braces.
234+
235+```
236+type :opaque = align 16 { 32 }
237+```
238+
239+#### Data
240+
241+```
242+DATADEF :=
243+ LINKAGE*
244+ 'data' $IDENT '=' ['align' NUMBER]
245+ '{'
246+ ( EXTTY DATAITEM+
247+ | 'z' NUMBER ),
248+ '}'
249+
250+DATAITEM :=
251+ $IDENT ['+' NUMBER] # Symbol and offset
252+ | '"' ... '"' # String
253+ | CONST # Constant
254+```
255+
256+Data definitions express objects that will be emitted in the compiled file. Their visibility and location in the compiled artifact are controlled with linkage flags described in the [Linkage](https://c9x.me/compile/doc/il.html#Linkage) section.
257+
258+They define a global identifier (starting with the sigil `$`), that will contain a pointer to the object specified by the definition.
259+
260+Objects are described by a sequence of fields that start with a type letter. This letter can either be an extended type, or the `z` letter. If the letter used is an extended type, the data item following specifies the bits to be stored in the field. When several data items follow a letter, they initialize multiple fields of the same size.
261+
262+The members of a struct will be packed. This means that padding has to be emitted by the frontend when necessary. Alignment of the whole data objects can be manually specified, and when no alignment is provided, the maximum alignment from the platform is used.
263+
264+When the `z` letter is used the number following indicates the size of the field; the contents of the field are zero initialized. It can be used to add padding between fields or zero-initialize big arrays.
265+
266+Here are various examples of data definitions.
267+
268+```
269+# Three 32-bit values 1, 2, and 3
270+# followed by a 0 byte.
271+data $a = { w 1 2 3, b 0 }
272+
273+# A thousand bytes 0 initialized.
274+data $b = { z 1000 }
275+
276+# An object containing two 64-bit
277+# fields, one with all bits sets and the
278+# other containing a pointer to the
279+# object itself.
280+data $c = { l -1, l $c }
281+```
282+
283+#### Functions
284+
285+```
286+FUNCDEF :=
287+ LINKAGE*
288+ 'function' [ABITY] $IDENT '(' (PARAM), ')' [NL]
289+ '{' NL
290+ BLOCK+
291+ '}'
292+
293+PARAM :=
294+ ABITY %IDENT # Regular parameter
295+ | 'env' %IDENT # Environment parameter (first)
296+ | '...' # Variadic marker (last)
297+
298+SUBWTY := 'sb' | 'ub' | 'sh' | 'uh' # Sub-word types
299+ABITY := BASETY | SUBWTY | :IDENT
300+```
301+
302+Function definitions contain the actual code to emit in the compiled file. They define a global symbol that contains a pointer to the function code. This pointer can be used in `call` instructions or stored in memory.
303+
304+The type given right before the function name is the return type of the function. All return values of this function must have this return type. If the return type is missing, the function must not return any value.
305+
306+The parameter list is a comma separated list of temporary names prefixed by types. The types are used to correctly implement C compatibility. When an argument has an aggregate type, a pointer to the aggregate is passed by the caller. In the example below, we have to use a load instruction to get the value of the first (and only) member of the struct.
307+
308+```
309+type :one = { w }
310+
311+function w $getone(:one %p) {
312+@start
313+ %val =w loadw %p
314+ ret %val
315+}
316+```
317+
318+If a function accepts or returns values that are smaller than a word, such as `signed char` or `unsigned short` in C, one of the sub-word type must be used. The sub-word types `sb`, `ub`, `sh`, and `uh` stand, respectively, for signed and unsigned 8-bit values, and signed and unsigned 16-bit values. Parameters associated with a sub-word type of bit width N only have their N least significant bits set and have base type `w`. For example, the function
319+
320+```
321+function w $addbyte(w %a, sb %b) {
322+@start
323+ %bw =w extsb %b
324+ %val =w add %a, %bw
325+ ret %val
326+}
327+```
328+
329+needs to sign-extend its second argument before the addition. Dually, return values with sub-word types do not need to be sign or zero extended.
330+
331+If the parameter list ends with `...`, the function is a variadic function: it can accept a variable number of arguments. To access the extra arguments provided by the caller, use the `vastart` and `vaarg` instructions described in the [Variadic](https://c9x.me/compile/doc/il.html#Variadic) section.
332+
333+Optionally, the parameter list can start with an environment parameter `env %e`. This special parameter is a 64-bit integer temporary (i.e., of type `l`). If the function does not use its environment parameter, callers can safely omit it. This parameter is invisible to a C caller: for example, the function
334+
335+```
336+export function w $add(env %e, w %a, w %b) {
337+@start
338+ %c =w add %a, %b
339+ ret %c
340+}
341+```
342+
343+must be given the C prototype `int add(int, int)`. The intended use of this feature is to pass the environment pointer of closures while retaining a very good compatibility with C. The [Call](https://c9x.me/compile/doc/il.html#Call) section explains how to pass an environment parameter.
344+
345+Since global symbols are defined mutually recursive, there is no need for function declarations: a function can be referenced before its definition. Similarly, functions from other modules can be used without previous declaration. All the type information necessary to compile a call is in the instruction itself.
346+
347+The syntax and semantics for the body of functions are described in the [Control](https://c9x.me/compile/doc/il.html#Control) section.
348+
349+### 6\. Control
350+
351+The IL represents programs as textual transcriptions of control flow graphs. The control flow is serialized as a sequence of blocks of straight-line code which are connected using jump instructions.
352+
353+#### Blocks
354+
355+```
356+BLOCK :=
357+ @IDENT NL # Block label
358+ ( PHI NL )* # Phi instructions
359+ ( INST NL )* # Regular instructions
360+ JUMP NL # Jump or return
361+```
362+
363+All blocks have a name that is specified by a label at their beginning. Then follows a sequence of instructions that have "fall-through" flow. Finally one jump terminates the block. The jump can either transfer control to another block of the same function or return; jumps are described further below.
364+
365+The first block in a function must not be the target of any jump in the program. If a jump to the function start is needed, the frontend must insert an empty prelude block at the beginning of the function.
366+
367+When one block jumps to the next block in the IL file, it is not necessary to write the jump instruction, it will be automatically added by the parser. For example the start block in the example below jumps directly to the loop block.
368+
369+```
370+function $loop() {
371+@start
372+@loop
373+ %x =w phi @start 100, @loop %x1
374+ %x1 =w sub %x, 1
375+ jnz %x1, @loop, @end
376+@end
377+ ret
378+}
379+```
380+
381+#### Jumps
382+
383+```
384+JUMP :=
385+ 'jmp' @IDENT # Unconditional
386+ | 'jnz' VAL, @IDENT, @IDENT # Conditional
387+ | 'ret' [VAL] # Return
388+ | 'hlt' # Termination
389+```
390+
391+A jump instruction ends every block and transfers the control to another program location. The target of a jump must never be the first block in a function. The three kinds of jumps available are described in the following list.
392+
393+1. Unconditional jump.
394+
395+ Simply jumps to another block of the same function.
396+
397+2. Conditional jump.
398+
399+ When its word argument is non-zero, it jumps to its first label argument; otherwise it jumps to the other label. The argument must be of word type; because of subtyping a long argument can be passed, but only its least significant 32 bits will be compared to 0.
400+
401+3. Function return.
402+
403+ Terminates the execution of the current function, optionally returning a value to the caller. The value returned must be of the type given in the function prototype. If the function prototype does not specify a return type, no return value can be used.
404+
405+4. Program termination.
406+
407+ Terminates the execution of the program with a target-dependent error. This instruction can be used when it is expected that the execution never reaches the end of the block it closes; for example, after having called a function such as `exit()`.
408+
409+
410+### 7\. Instructions
411+
412+Instructions are the smallest piece of code in the IL, they form the body of [Blocks](https://c9x.me/compile/doc/il.html#Blocks). The IL uses a three-address code, which means that one instruction computes an operation between two operands and assigns the result to a third one.
413+
414+An instruction has both a name and a return type, this return type is a base type that defines the size of the instruction's result. The type of the arguments can be unambiguously inferred using the instruction name and the return type. For example, for all arithmetic instructions, the type of the arguments is the same as the return type. The two additions below are valid if `%y` is a word or a long (because of [Subtyping](https://c9x.me/compile/doc/il.html#Subtyping)).
415+
416+```
417+%x =w add 0, %y
418+%z =w add %x, %x
419+```
420+
421+Some instructions, like comparisons and memory loads have operand types that differ from their return types. For instance, two floating points can be compared to give a word result (0 if the comparison succeeds, 1 if it fails).
422+
423+```
424+%c =w cgts %a, %b
425+```
426+
427+In the example above, both operands have to have single type. This is made explicit by the instruction suffix.
428+
429+The types of instructions are described below using a short type string. A type string specifies all the valid return types an instruction can have, its arity, and the type of its arguments depending on its return type.
430+
431+Type strings begin with acceptable return types, then follows, in parentheses, the possible types for the arguments. If the N-th return type of the type string is used for an instruction, the arguments must use the N-th type listed for them in the type string. When an instruction does not have a return type, the type string only contains the types of the arguments.
432+
433+The following abbreviations are used.
434+
435+- `T` stands for `wlsd`
436+- `I` stands for `wl`
437+- `F` stands for `sd`
438+- `m` stands for the type of pointers on the target; on 64-bit architectures it is the same as `l`
439+
440+For example, consider the type string `wl(F)`, it mentions that the instruction has only one argument and that if the return type used is long, the argument must be of type double.
441+
442+#### Arithmetic and Bits
443+
444+- `add`, `sub`, `div`, `mul` -- `T(T,T)`
445+- `neg` -- `T(T)`
446+- `udiv`, `rem`, `urem` -- `I(I,I)`
447+- `or`, `xor`, `and` -- `I(I,I)`
448+- `sar`, `shr`, `shl` -- `I(I,ww)`
449+
450+The base arithmetic instructions in the first bullet are available for all types, integers and floating points.
451+
452+When `div` is used with word or long return type, the arguments are treated as signed. The unsigned integral division is available as `udiv` instruction. When the result of a division is not an integer, it is truncated towards zero.
453+
454+The signed and unsigned remainder operations are available as `rem` and `urem`. The sign of the remainder is the same as the one of the dividend. Its magnitude is smaller than the divisor one. These two instructions and `udiv` are only available with integer arguments and result.
455+
456+Bitwise OR, AND, and XOR operations are available for both integer types. Logical operations of typical programming languages can be implemented using [Comparisons](https://c9x.me/compile/doc/il.html#Comparisons) and [Jumps](https://c9x.me/compile/doc/il.html#Jumps).
457+
458+Shift instructions `sar`, `shr`, and `shl`, shift right or left their first operand by the amount from the second operand. The shifting amount is taken modulo the size of the result type. Shifting right can either preserve the sign of the value (using `sar`), or fill the newly freed bits with zeroes (using `shr`). Shifting left always fills the freed bits with zeroes.
459+
460+Remark that an arithmetic shift right (`sar`) is only equivalent to a division by a power of two for non-negative numbers. This is because the shift right "truncates" towards minus infinity, while the division truncates towards zero.
461+
462+#### Memory
463+
464+- Store instructions.
465+
466+ - `stored` -- `(d,m)`
467+ - `stores` -- `(s,m)`
468+ - `storel` -- `(l,m)`
469+ - `storew` -- `(w,m)`
470+ - `storeh` -- `(w,m)`
471+ - `storeb` -- `(w,m)`
472+
473+ Store instructions exist to store a value of any base type and any extended type. Since halfwords and bytes are not first class in the IL, `storeh` and `storeb` take a word as argument. Only the first 16 or 8 bits of this word will be stored in memory at the address specified in the second argument.
474+
475+- Load instructions.
476+
477+ - `loadd` -- `d(m)`
478+ - `loads` -- `s(m)`
479+ - `loadl` -- `l(m)`
480+ - `loadsw`, `loaduw` -- `I(mm)`
481+ - `loadsh`, `loaduh` -- `I(mm)`
482+ - `loadsb`, `loadub` -- `I(mm)`
483+
484+ For types smaller than long, two variants of the load instruction are available: one will sign extend the loaded value, while the other will zero extend it. Note that all loads smaller than long can load to either a long or a word.
485+
486+ The two instructions `loadsw` and `loaduw` have the same effect when they are used to define a word temporary. A `loadw` instruction is provided as syntactic sugar for `loadsw` to make explicit that the extension mechanism used is irrelevant.
487+
488+- Blits.
489+
490+ - `blit` -- `(m,m,w)`
491+
492+ The blit instruction copies in-memory data from its first address argument to its second address argument. The third argument is the number of bytes to copy. The source and destination spans are required to be either non-overlapping, or fully overlapping (source address identical to the destination address). The byte count argument must be a nonnegative numeric constant; it cannot be a temporary.
493+
494+ One blit instruction may generate a number of instructions proportional to its byte count argument, consequently, it is recommended to keep this argument relatively small. If large copies are necessary, it is preferable that frontends generate calls to a supporting `memcpy` function.
495+
496+- Stack allocation.
497+
498+ - `alloc4` -- `m(l)`
499+ - `alloc8` -- `m(l)`
500+ - `alloc16` -- `m(l)`
501+
502+ These instructions allocate a chunk of memory on the stack. The number ending the instruction name is the alignment required for the allocated slot. QBE will make sure that the returned address is a multiple of that alignment value.
503+
504+ Stack allocation instructions are used, for example, when compiling the C local variables, because their address can be taken. When compiling Fortran, temporaries can be used directly instead, because it is illegal to take the address of a variable.
505+
506+
507+The following example makes use of some of the memory instructions. Pointers are stored in long temporaries.
508+
509+```
510+%A0 =l alloc4 8 # stack allocate an array A of 2 words
511+%A1 =l add %A0, 4
512+storew 43, %A0 # A[0] <- 43
513+storew 255, %A1 # A[1] <- 255
514+%v1 =w loadw %A0 # %v1 <- A[0] as word
515+%v2 =w loadsb %A1 # %v2 <- A[1] as signed byte
516+%v3 =w add %v1, %v2 # %v3 is 42 here
517+```
518+
519+#### Comparisons
520+
521+Comparison instructions return an integer value (either a word or a long), and compare values of arbitrary types. The returned value is 1 if the two operands satisfy the comparison relation, or 0 otherwise. The names of comparisons respect a standard naming scheme in three parts.
522+
523+1. All comparisons start with the letter `c`.
524+2. Then comes a comparison type. The following types are available for integer comparisons:
525+
526+ - `eq` for equality
527+ - `ne` for inequality
528+ - `sle` for signed lower or equal
529+ - `slt` for signed lower
530+ - `sge` for signed greater or equal
531+ - `sgt` for signed greater
532+ - `ule` for unsigned lower or equal
533+ - `ult` for unsigned lower
534+ - `uge` for unsigned greater or equal
535+ - `ugt` for unsigned greater
536+
537+ Floating point comparisons use one of these types:
538+
539+ - `eq` for equality
540+ - `ne` for inequality
541+ - `le` for lower or equal
542+ - `lt` for lower
543+ - `ge` for greater or equal
544+ - `gt` for greater
545+ - `o` for ordered (no operand is a NaN)
546+ - `uo` for unordered (at least one operand is a NaN)
547+
548+ Because floating point types always have a sign bit, all the comparisons available are signed.
549+
550+3. Finally, the instruction name is terminated with a basic type suffix precising the type of the operands to be compared.
551+
552+For example, `cod` (`I(dd,dd)`) compares two double-precision floating point numbers and returns 1 if the two floating points are not NaNs, or 0 otherwise. The `csltw` (`I(ww,ww)`) instruction compares two words representing signed numbers and returns 1 when the first argument is smaller than the second one.
553+
554+#### Conversions
555+
556+Conversion operations change the representation of a value, possibly modifying it if the target type cannot hold the value of the source type. Conversions can extend the precision of a temporary (e.g., from signed 8-bit to 32-bit), or convert a floating point into an integer and vice versa.
557+
558+- `extsw`, `extuw` -- `l(w)`
559+- `extsh`, `extuh` -- `I(ww)`
560+- `extsb`, `extub` -- `I(ww)`
561+- `exts` -- `d(s)`
562+- `truncd` -- `s(d)`
563+- `stosi` -- `I(ss)`
564+- `stoui` -- `I(ss)`
565+- `dtosi` -- `I(dd)`
566+- `dtoui` -- `I(dd)`
567+- `swtof` -- `F(ww)`
568+- `uwtof` -- `F(ww)`
569+- `sltof` -- `F(ll)`
570+- `ultof` -- `F(ll)`
571+
572+Extending the precision of a temporary is done using the `ext` family of instructions. Because QBE types do not specify the signedness (like in LLVM), extension instructions exist to sign-extend and zero-extend a value. For example, `extsb` takes a word argument and sign-extends the 8 least-significant bits to a full word or long, depending on the return type.
573+
574+The instructions `exts` (extend single) and `truncd` (truncate double) are provided to change the precision of a floating point value. When the double argument of `truncd` cannot be represented as a single-precision floating point, it is truncated towards zero.
575+
576+Converting between signed integers and floating points is done using `stosi` (single to signed integer), `stoui` (single to unsigned integer, `dtosi` (double to signed integer), `dtoui` (double to unsigned integer), `swtof` (signed word to float), `uwtof` (unsigned word to float), `sltof` (signed long to float) and `ultof` (unsigned long to float).
577+
578+Because of [Subtyping](https://c9x.me/compile/doc/il.html#Subtyping), there is no need to have an instruction to lower the precision of an integer temporary.
579+
580+#### Cast and Copy
581+
582+The `cast` and `copy` instructions return the bits of their argument verbatim. However a `cast` will change an integer into a floating point of the same width and vice versa.
583+
584+- `cast` -- `wlsd(sdwl)`
585+- `copy` -- `T(T)`
586+
587+Casts can be used to make bitwise operations on the representation of floating point numbers. For example the following program will compute the opposite of the single-precision floating point number `%f` into `%rs`.
588+
589+```
590+%b0 =w cast %f
591+%b1 =w xor 2147483648, %b0 # flip the msb
592+%rs =s cast %b1
593+```
594+
595+#### Call
596+
597+```
598+CALL := [%IDENT '=' ABITY] 'call' VAL '(' (ARG), ')'
599+
600+ARG :=
601+ ABITY VAL # Regular argument
602+ | 'env' VAL # Environment argument (first)
603+ | '...' # Variadic marker
604+
605+SUBWTY := 'sb' | 'ub' | 'sh' | 'uh' # Sub-word types
606+ABITY := BASETY | SUBWTY | :IDENT
607+```
608+
609+The call instruction is special in several ways. It is not a three-address instruction and requires the type of all its arguments to be given. Also, the return type can be either a base type or an aggregate type. These specifics are required to compile calls with C compatibility (i.e., to respect the ABI).
610+
611+When an aggregate type is used as argument type or return type, the value respectively passed or returned needs to be a pointer to a memory location holding the value. This is because aggregate types are not first-class citizens of the IL.
612+
613+Sub-word types are used for arguments and return values of width less than a word. Details on these types are presented in the [Functions](https://c9x.me/compile/doc/il.html#Functions) section. Arguments with sub-word types need not be sign or zero extended according to their type. Calls with a sub-word return type define a temporary of base type `w` with its most significant bits unspecified.
614+
615+Unless the called function does not return a value, a return temporary must be specified, even if it is never used afterwards.
616+
617+An environment parameter can be passed as first argument using the `env` keyword. The passed value must be a 64-bit integer. If the called function does not expect an environment parameter, it will be safely discarded. See the [Functions](https://c9x.me/compile/doc/il.html#Functions) section for more information about environment parameters.
618+
619+When the called function is variadic, there must be a `...` marker separating the named and variadic arguments.
620+
621+#### Variadic
622+
623+The `vastart` and `vaarg` instructions provide a portable way to access the extra parameters of a variadic function.
624+
625+- `vastart` -- `(m)`
626+- `vaarg` -- `T(mmmm)`
627+
628+The `vastart` instruction initializes a _variable argument list_ used to access the extra parameters of the enclosing variadic function. It is safe to call it multiple times.
629+
630+The `vaarg` instruction fetches the next argument from a variable argument list. It is currently limited to fetching arguments that have a base type. This instruction is essentially effectful: calling it twice in a row will return two consecutive arguments from the argument list.
631+
632+Both instructions take a pointer to a variable argument list as sole argument. The size and alignment of variable argument lists depend on the target used. However, it is possible to conservatively use the maximum size and alignment required by all the targets.
633+
634+```
635+type :valist = align 8 { 24 } # For amd64_sysv
636+type :valist = align 8 { 32 } # For arm64
637+type :valist = align 8 { 8 } # For rv64
638+```
639+
640+The following example defines a variadic function adding its first three arguments.
641+
642+```
643+function s $add3(s %a, ...) {
644+@start
645+ %ap =l alloc8 32
646+ vastart %ap
647+ %r =s call $vadd(s %a, l %ap)
648+ ret %r
649+}
650+
651+function s $vadd(s %a, l %ap) {
652+@start
653+ %b =s vaarg %ap
654+ %c =s vaarg %ap
655+ %d =s add %a, %b
656+ %e =s add %d, %c
657+ ret %e
658+}
659+```
660+
661+#### Phi
662+
663+```
664+PHI := %IDENT '=' BASETY 'phi' ( @IDENT VAL ),
665+```
666+
667+First and foremost, phi instructions are NOT necessary when writing a frontend to QBE. One solution to avoid having to deal with SSA form is to use stack allocated variables for all source program variables and perform assignments and lookups using [Memory](https://c9x.me/compile/doc/il.html#Memory) operations. This is what LLVM users typically do.
668+
669+Another solution is to simply emit code that is not in SSA form! Contrary to LLVM, QBE is able to fixup programs not in SSA form without requiring the boilerplate of loading and storing in memory. For example, the following program will be correctly compiled by QBE.
670+
671+```
672+@start
673+ %x =w copy 100
674+ %s =w copy 0
675+@loop
676+ %s =w add %s, %x
677+ %x =w sub %x, 1
678+ jnz %x, @loop, @end
679+@end
680+ ret %s
681+```
682+
683+Now, if you want to know what phi instructions are and how to use them in QBE, you can read the following.
684+
685+Phi instructions are specific to SSA form. In SSA form values can only be assigned once, without phi instructions, this requirement is too strong to represent many programs. For example consider the following C program.
686+
687+```
688+int f(int x) {
689+ int y;
690+ if (x)
691+ y = 1;
692+ else
693+ y = 2;
694+ return y;
695+}
696+```
697+
698+The variable `y` is assigned twice, the solution to translate it in SSA form is to insert a phi instruction.
699+
700+```
701+@ifstmt
702+ jnz %x, @ift, @iff
703+@ift
704+ jmp @retstmt
705+@iff
706+ jmp @retstmt
707+@retstmt
708+ %y =w phi @ift 1, @iff 2
709+ ret %y
710+```
711+
712+Phi instructions return one of their arguments depending on where the control came from. In the example, `%y` is set to 1 if the `@ift` branch is taken, or it is set to 2 otherwise.
713+
714+An important remark about phi instructions is that QBE assumes that if a variable is defined by a phi it respects all the SSA invariants. So it is critical to not use phi instructions unless you know exactly what you are doing.
715+
716+### 8\. Instructions Index
717+
718+- [Arithmetic and Bits](https://c9x.me/compile/doc/il.html#Arithmetic-and-Bits):
719+
720+ - `add`
721+ - `and`
722+ - `div`
723+ - `mul`
724+ - `neg`
725+ - `or`
726+ - `rem`
727+ - `sar`
728+ - `shl`
729+ - `shr`
730+ - `sub`
731+ - `udiv`
732+ - `urem`
733+ - `xor`
734+- [Memory](https://c9x.me/compile/doc/il.html#Memory):
735+
736+ - `alloc16`
737+ - `alloc4`
738+ - `alloc8`
739+ - `blit`
740+ - `loadd`
741+ - `loadl`
742+ - `loads`
743+ - `loadsb`
744+ - `loadsh`
745+ - `loadsw`
746+ - `loadub`
747+ - `loaduh`
748+ - `loaduw`
749+ - `loadw`
750+ - `storeb`
751+ - `stored`
752+ - `storeh`
753+ - `storel`
754+ - `stores`
755+ - `storew`
756+- [Comparisons](https://c9x.me/compile/doc/il.html#Comparisons):
757+
758+ - `ceqd`
759+ - `ceql`
760+ - `ceqs`
761+ - `ceqw`
762+ - `cged`
763+ - `cges`
764+ - `cgtd`
765+ - `cgts`
766+ - `cled`
767+ - `cles`
768+ - `cltd`
769+ - `clts`
770+ - `cned`
771+ - `cnel`
772+ - `cnes`
773+ - `cnew`
774+ - `cod`
775+ - `cos`
776+ - `csgel`
777+ - `csgew`
778+ - `csgtl`
779+ - `csgtw`
780+ - `cslel`
781+ - `cslew`
782+ - `csltl`
783+ - `csltw`
784+ - `cugel`
785+ - `cugew`
786+ - `cugtl`
787+ - `cugtw`
788+ - `culel`
789+ - `culew`
790+ - `cultl`
791+ - `cultw`
792+ - `cuod`
793+ - `cuos`
794+- [Conversions](https://c9x.me/compile/doc/il.html#Conversions):
795+
796+ - `dtosi`
797+ - `dtoui`
798+ - `exts`
799+ - `extsb`
800+ - `extsh`
801+ - `extsw`
802+ - `extub`
803+ - `extuh`
804+ - `extuw`
805+ - `sltof`
806+ - `ultof`
807+ - `stosi`
808+ - `stoui`
809+ - `swtof`
810+ - `uwtof`
811+ - `truncd`
812+- [Cast and Copy](https://c9x.me/compile/doc/il.html#Cast-and-Copy) :
813+
814+ - `cast`
815+ - `copy`
816+- [Call](https://c9x.me/compile/doc/il.html#Call):
817+
818+ - `call`
819+- [Variadic](https://c9x.me/compile/doc/il.html#Variadic):
820+
821+ - `vastart`
822+ - `vaarg`
823+- [Phi](https://c9x.me/compile/doc/il.html#Phi):
824+
825+ - `phi`
826+- [Jumps](https://c9x.me/compile/doc/il.html#Jumps):
827+
828+ - `hlt`
829+ - `jmp`
830+ - `jnz`
831+ - `ret`
+0,
-0
+0,
-0
+17,
-0
1@@ -0,0 +1,17 @@
2+# B extensions
3+
4+Here we document all the things that deviate or extend the original description of the B programming language from [kbman](https://www.nokia.com/bell-labs/about/dennis-m-ritchie/kbman.html).
5+
6+## Top Level `extrn` declarations
7+
8+```c
9+main() {
10+ printf("Hello, World\n");
11+}
12+
13+extrn printf;
14+```
15+
16+`printf` is now visible to all the functions in the global scope.
17+
18+## TODO: document how character escaping works: hex, utf8
+0,
-0
+0,
-0
+0,
-0
+15,
-15
1@@ -1,36 +1,36 @@
2 {
3- "binary_path": "/tmp/gtest-3087621747/f6bfb35d69b7af1",
4+ "binary_path": "/tmp/gtest-574252330/f6bfb35d69b7af1",
5 "compile": {
6- "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nQBE Codegen...\nCalling libqbe on our QBE IR...\nAssembling and linking to create '/tmp/gtest-3087621747/f6bfb35d69b7af1'...\n----------------------\nCompilation successful!\n",
7- "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv'\n",
8+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-574252330/f6bfb35d69b7af1'...\n----------------------\nCompilation successful!\n",
9+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
10 "exitCode": 0,
11- "duration": 58934128,
12+ "duration": 42724761,
13 "timed_out": false
14 },
15 "runs": [
16 {
17 "name": "fold",
18 "args": [
19- "09876543210123456789009887654321012345678900987654321098765432101234567890098876543210123456789009876543210\n"
20+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
21 ],
22 "result": {
23 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
24 "stderr": "",
25 "exitCode": 0,
26- "duration": 480603,
27+ "duration": 349710,
28 "timed_out": false
29 }
30 },
31 {
32 "name": "fold2",
33 "args": [
34- "098765432101234567890098876543210123456789009876543210\n"
35+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
36 ],
37 "result": {
38 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
39 "stderr": "",
40 "exitCode": 0,
41- "duration": 439486,
42+ "duration": 322891,
43 "timed_out": false
44 }
45 },
46@@ -43,7 +43,7 @@
47 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
48 "stderr": "",
49 "exitCode": 0,
50- "duration": 388710,
51+ "duration": 306735,
52 "timed_out": false
53 }
54 },
55@@ -53,7 +53,7 @@
56 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
57 "stderr": "",
58 "exitCode": 0,
59- "duration": 345615,
60+ "duration": 321056,
61 "timed_out": false
62 }
63 },
64@@ -66,7 +66,7 @@
65 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
66 "stderr": "",
67 "exitCode": 0,
68- "duration": 346734,
69+ "duration": 312027,
70 "timed_out": false
71 }
72 },
73@@ -79,7 +79,7 @@
74 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
75 "stderr": "",
76 "exitCode": 0,
77- "duration": 340355,
78+ "duration": 309499,
79 "timed_out": false
80 }
81 },
82@@ -92,7 +92,7 @@
83 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
84 "stderr": "",
85 "exitCode": 0,
86- "duration": 332373,
87+ "duration": 309075,
88 "timed_out": false
89 }
90 },
91@@ -105,7 +105,7 @@
92 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
93 "stderr": "",
94 "exitCode": 0,
95- "duration": 346248,
96+ "duration": 320944,
97 "timed_out": false
98 }
99 },
100@@ -118,7 +118,7 @@
101 "stdout": "--- Vector \u0026 Matrix Math Demo ---\nVector A: [3, 5]\nVector B: [-2, 4]\n\n--- Vector Operations ---\nA + B: [1, 9]\nA . B: 14\n\n--- Matrix Operations ---\nRotation Matrix R: [[0, 1], [-1, 0]]\nScaling Matrix T: [[2, 0], [0, 3]]\nR * A: [5, -3]\nT * R: [[0, 2], [-3, 0]]\n(T * R) * B: [8, 6]\n",
102 "stderr": "",
103 "exitCode": 0,
104- "duration": 342933,
105+ "duration": 341927,
106 "timed_out": false
107 }
108 }
+80,
-0
1@@ -0,0 +1,80 @@
2+extrn printf;
3+
4+type struct tm {
5+ sec,
6+ min,
7+ hour,
8+ mday,
9+ mon,
10+ year,
11+ wday,
12+ yday,
13+ isdst int32; // localtime is extrn, it comes from the Musl libc
14+}; // and `tm` is the same struct as the one there
15+ // :3
16+
17+// Typed external function declarations
18+int extrn time;
19+tm* extrn localtime;
20+
21+int32 days2Month(m int, y int) {
22+ if (m == 1) { // feb
23+ if ((y % 400 == 0) | ((y % 4 == 0) & (y % 100 != 0))) {
24+ return (29);
25+ } else {
26+ return (28);
27+ }
28+ };
29+ if ((m == 0) | (m == 2) | (m == 4) | (m == 6) | (m == 7) | (m == 9) | (m == 11)) {
30+ return (31);
31+ };
32+ return (30);
33+}
34+
35+int main() {
36+ now := time(0);
37+ tptr := localtime(&now);
38+ tm := *tptr;
39+
40+ year := tm.year + 1900;
41+ month := tm.mon;
42+ today := tm.mday;
43+ wday := tm.wday;
44+
45+ days := days2Month(month, year);
46+
47+ printf(" %d/%d\n", month + 1, year);
48+ printf("Su Mo Tu We Th Fr Sa\n");
49+
50+ // Calculate the weekday of the first day of the month
51+ first_wday := (wday - (today - 1) % 7 + 7) % 7;
52+
53+ i := 0;
54+ while (i < first_wday) {
55+ printf(" ");
56+ i = i + 1;
57+ };
58+
59+ d := 1;
60+ while (d <= days) {
61+ if (d == today) {
62+ printf("\033[31m%2d\033[0m ", d);
63+ } else {
64+ printf("%2d ", d);
65+ };
66+
67+ current_wday := (first_wday + d - 1) % 7;
68+ if (current_wday == 6) { // It's Saturday, print \n
69+ printf("\n");
70+ };
71+ d = d + 1;
72+ };
73+
74+ // Month doesn't end in Saturday so add \n
75+ if ((first_wday + days - 1) % 7 != 6) {
76+ printf("\n");
77+ };
78+
79+ return (0);
80+}
81+
+78,
-0
1@@ -0,0 +1,78 @@
2+extrn printf;
3+
4+type struct tm {
5+ sec,
6+ min,
7+ hour,
8+ mday,
9+ mon,
10+ year,
11+ wday,
12+ yday,
13+ isdst int32; // localtime is extrn, it comes from the Musl libc
14+}; // and `tm` is the same struct as the one there
15+ // :3
16+
17+extrn time, localtime;
18+
19+int32 days2Month(m int, y int) {
20+ if (m == 1) { // feb
21+ if ((y % 400 == 0) | ((y % 4 == 0) & (y % 100 != 0))) {
22+ return (29);
23+ } else {
24+ return (28);
25+ }
26+ };
27+ if ((m == 0) | (m == 2) | (m == 4) | (m == 6) | (m == 7) | (m == 9) | (m == 11)) {
28+ return (31);
29+ };
30+ return (30);
31+}
32+
33+int main() {
34+ now := time(0);
35+ tptr := localtime(&now);
36+ tm := *tptr;
37+
38+ year := tm.year + 1900;
39+ month := tm.mon;
40+ today := tm.mday;
41+ wday := tm.wday;
42+
43+ days := days2Month(month, year);
44+
45+ printf(" %d/%d\n", month + 1, year);
46+ printf("Su Mo Tu We Th Fr Sa\n");
47+
48+ // Calculate the weekday of the first day of the month
49+ first_wday := (wday - (today - 1) % 7 + 7) % 7;
50+
51+ i := 0;
52+ while (i < first_wday) {
53+ printf(" ");
54+ i = i + 1;
55+ };
56+
57+ d := 1;
58+ while (d <= days) {
59+ if (d == today) {
60+ printf("\033[31m%2d\033[0m ", d);
61+ } else {
62+ printf("%2d ", d);
63+ };
64+
65+ current_wday := (first_wday + d - 1) % 7;
66+ if (current_wday == 6) { // It's Saturday, print \n
67+ printf("\n");
68+ };
69+ d = d + 1;
70+ };
71+
72+ // Month doesn't end in Saturday so add \n
73+ if ((first_wday + days - 1) % 7 != 6) {
74+ printf("\n");
75+ };
76+
77+ return (0);
78+}
79+
+77,
-0
1@@ -0,0 +1,77 @@
2+extrn printf, time, localtime;
3+
4+/*
5+ * On amd64_sysv, the C struct tm has the following layout:
6+ *
7+ * struct tm {
8+ * int tm_sec; // 0
9+ * int tm_min; // 4
10+ * int tm_hour; // 8
11+ * int tm_mday; // 12
12+ * int tm_mon; // 16
13+ * int tm_year; // 20
14+ * int tm_wday; // 24
15+ * int tm_yday; // 28
16+ * int tm_isdst; // 32
17+ * };
18+ *
19+ * All members are 4-byte integers.
20+ */
21+
22+int days_in_month(m int, y int) {
23+ if (m == 1) { // February
24+ if ((y % 400 == 0) | ((y % 4 == 0) & (y % 100 != 0))) {
25+ return (29);
26+ } else {
27+ return (28);
28+ };
29+ };
30+ if ((m == 0) | (m == 2) | (m == 4) | (m == 6) | (m == 7) | (m == 9) | (m == 11)) {
31+ return (31);
32+ };
33+ return (30);
34+}
35+
36+int main() {
37+ now := time(0);
38+ tptr := localtime(&now);
39+
40+ byte_ptr := (byte*)tptr;
41+ year := *((int*)(byte_ptr + 20)) + 1900;
42+ month := *((int*)(byte_ptr + 16)); // 0 = Jan
43+ today := *((int*)(byte_ptr + 12));
44+ wday := *((int*)(byte_ptr + 24));
45+
46+ days := days_in_month(month, year);
47+
48+ printf(" %d/%d\n", month + 1, year);
49+ printf("Su Mo Tu We Th Fr Sa\n");
50+
51+ // Calculate the weekday of the first day of the month.
52+ // wday is for today, so we go back (today - 1) days.
53+ // The result must be non-negative, so we add 7 before the modulo
54+ first_wday := (wday - (today - 1) % 7 + 7) % 7;
55+
56+ i := 0;
57+ while (i < first_wday) {
58+ printf(" ");
59+ i = i + 1;
60+ };
61+
62+ d := 1;
63+ while (d <= days) {
64+ if (d == today) {
65+ printf("\033[31m%2d\033[0m ", d);
66+ } else {
67+ printf("%2d ", d);
68+ };
69+
70+ current_wday := (first_wday + d - 1) % 7;
71+ if (current_wday == 6) { // It's Saturday, add \n
72+ printf("\n");
73+ };
74+ d = d + 1;
75+ };
76+ printf("\n");
77+ return (0);
78+}
M
go.mod
+1,
-2
1@@ -13,7 +13,6 @@ require (
2
3 require (
4 github.com/dustin/go-humanize v1.0.1 // indirect
5- github.com/goforj/godump v1.5.0 // indirect
6 github.com/google/uuid v1.6.0 // indirect
7 github.com/mattn/go-isatty v0.0.20 // indirect
8 github.com/ncruces/go-strftime v0.1.9 // indirect
9@@ -21,7 +20,7 @@ require (
10 golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect
11 golang.org/x/sys v0.35.0 // indirect
12 modernc.org/goabi0 v0.2.0 // indirect
13- modernc.org/libc v1.66.7 // indirect
14+ modernc.org/libc v1.66.8 // indirect
15 modernc.org/mathutil v1.7.1 // indirect
16 modernc.org/memory v1.11.0 // indirect
17 modernc.org/token v1.1.0 // indirect
M
go.sum
+10,
-36
1@@ -2,8 +2,6 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
2 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
3 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
4 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
5-github.com/goforj/godump v1.5.0 h1:QALI7uJkpiwAW3Anko+vOFQzLlWvB7YviYCJpE1sFIE=
6-github.com/goforj/godump v1.5.0/go.mod h1:lCaXaxNTozTNAMJTPY91/ntMqw3JF8FOL93jCNKpNW0=
7 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
8 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
9 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
10@@ -18,57 +16,33 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
11 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
12 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
13 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
14-golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 h1:R9PFI6EUdfVKgwKjZef7QIwGcBKu86OEFpJ9nUEP2l4=
15-golang.org/x/exp v0.0.0-20250718183923-645b1fa84792/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc=
16-golang.org/x/exp v0.0.0-20250813145105-42675adae3e6 h1:SbTAbRFnd5kjQXbczszQ0hdk3ctwYf3qBNH9jIsGclE=
17-golang.org/x/exp v0.0.0-20250813145105-42675adae3e6/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
18 golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0=
19 golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
20-golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
21-golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
22 golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
23+golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
24 golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
25 golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
26 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
27-golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
28-golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
29 golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
30 golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
31 golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
32 golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
33-golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
34-golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
35 golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
36-modernc.org/cc/v4 v4.26.3 h1:yEN8dzrkRFnn4PUUKXLYIqVf2PJYAEjMTFjO3BDGc3I=
37-modernc.org/cc/v4 v4.26.3/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
38+golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
39 modernc.org/cc/v4 v4.26.4 h1:jPhG8oNjtTYuP2FA4YefTJ/wioNUGALmGuEWt7SUR6s=
40-modernc.org/ccgo/v4 v4.28.0 h1:rjznn6WWehKq7dG4JtLRKxb52Ecv8OUGah8+Z/SfpNU=
41-modernc.org/ccgo/v4 v4.28.0/go.mod h1:JygV3+9AV6SmPhDasu4JgquwU81XAKLd3OKTUDNOiKE=
42-modernc.org/fileutil v1.3.8 h1:qtzNm7ED75pd1C7WgAGcK4edm4fvhtBsEiI/0NQ54YM=
43-modernc.org/fileutil v1.3.8/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
44-modernc.org/fileutil v1.3.15 h1:rJAXTP6ilMW/1+kzDiqmBlHLWszheUFXIyGQIAvjJpY=
45-modernc.org/fileutil v1.3.20 h1:HxYM7QaeqszXhtIbmcao35huy9YTYRrRZuN4saQovG8=
46+modernc.org/cc/v4 v4.26.4/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
47+modernc.org/ccgo/v4 v4.28.1 h1:wPKYn5EC/mYTqBO373jKjvX2n+3+aK7+sICCv4Fjy1A=
48+modernc.org/ccgo/v4 v4.28.1/go.mod h1:uD+4RnfrVgE6ec9NGguUNdhqzNIeeomeXf6CL0GTE5Q=
49+modernc.org/fileutil v1.3.28 h1:Vp156KUA2nPu9F1NEv036x9UGOjg2qsi5QlWTjZmtMk=
50+modernc.org/fileutil v1.3.28/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
51 modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
52 modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
53-modernc.org/gc/v3 v3.1.0 h1:CiObI+9ROz7pjjH3iAgMPaFCN5zE3sN5KF4jet8BWdc=
54-modernc.org/gc/v3 v3.1.0/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
55 modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
56+modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
57 modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
58 modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
59-modernc.org/libc v1.66.6 h1:RyQpwAhM/19nXD8y3iejM/AjmKwY2TjxZTlUWTsWw2U=
60-modernc.org/libc v1.66.6/go.mod h1:j8z0EYAuumoMQ3+cWXtmw6m+LYn3qm8dcZDFtFTSq+M=
61-modernc.org/libc v1.66.7 h1:rjhZ8OSCybKWxS1CJr0hikpEi6Vg+944Ouyrd+bQsoY=
62-modernc.org/libc v1.66.7/go.mod h1:ln6tbWX0NH+mzApEoDRvilBvAWFt1HX7AUA4VDdVDPM=
63-modernc.org/libqbe v0.3.17 h1:2vnU1Y9ay4FFNtnieI1i+tm6uwQwIT+bpJDOqohQF0g=
64-modernc.org/libqbe v0.3.17/go.mod h1:7OLbdGw1qk5BrO3MpJidWbFAUH3RCDk1fI1RbEN98yY=
65-modernc.org/libqbe v0.3.18 h1:tDqVm12NvVJd9eYfYDTxSKDLqrvkonzFy4CwGdf46bI=
66-modernc.org/libqbe v0.3.18/go.mod h1:v9jfQ3pPqP0lloc3x9s/O0QyTrAyWl7nBRDc3CA1EKY=
67-modernc.org/libqbe v0.3.19 h1:u/JP8fjPYjg8Cbiu42lBNB+Q+x0q7kQKFZnDv7QI1C0=
68-modernc.org/libqbe v0.3.19/go.mod h1:v9jfQ3pPqP0lloc3x9s/O0QyTrAyWl7nBRDc3CA1EKY=
69-modernc.org/libqbe v0.3.20 h1:MQ7/yQ1YOww6iYUrYo+ffrm8v+7L0FR/ZTHtWJmJaQ8=
70-modernc.org/libqbe v0.3.20/go.mod h1:v9jfQ3pPqP0lloc3x9s/O0QyTrAyWl7nBRDc3CA1EKY=
71-modernc.org/libqbe v0.3.21 h1:qDlRpTO1aQ4gPUXZv/6SLblMq1nOajWsi4ibsPIaZVY=
72-modernc.org/libqbe v0.3.21/go.mod h1:v9jfQ3pPqP0lloc3x9s/O0QyTrAyWl7nBRDc3CA1EKY=
73+modernc.org/libc v1.66.8 h1:/awsvTnyN/sNjvJm6S3lb7KZw5WV4ly/sBEG7ZUzmIE=
74+modernc.org/libc v1.66.8/go.mod h1:aVdcY7udcawRqauu0HukYYxtBSizV+R80n/6aQe9D5k=
75 modernc.org/libqbe v0.3.23 h1:EDYelNyP3blv6lOTGsVwreNqeto3ZzgqAcP8dYSzBOA=
76 modernc.org/libqbe v0.3.23/go.mod h1:V1AfFp9d5BdPzws+QyZENCC+xl/LZ3//WFDithgZAhs=
77 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+2,
-2
1@@ -1,6 +1,6 @@
2 # libb - The Standard Library of B
3
4-Here we describe the bare minium of the functionality that libb must provide. It may provide more on some platforms if necessary.
5+Here we describe the bare minimum functionality that libb must provide. It may provide more on some platforms if necessary.
6
7 Some platforms like `gas-x86_64-linux`, `gas-aarch64-linux`, etc also link with libc, which means some of the functionality of libb is covered by libc. For platforms that do not link with libc (like `uxn`, `6502`, etc) the required functionality should be implemented from scratch.
8
9@@ -10,7 +10,7 @@ If you don't want to link with libb (and libc on the platforms where it's availa
10
11 Loosely based on `8.0 Library Functions` from [kbman][kbman]. May contain additional historically inaccurate things.
12
13-<!-- TODO: document the main(argc, argv) functionality that is provided by libb -->
14+Note: `main(argc, argv)` functionality is provided by libb but not documented yet.
15
16 | Signature | Description |
17 |------------------------------|------------------------------------------------------------------------------------------------------------------|
+131,
-46
1@@ -1,8 +1,10 @@
2 package ast
3
4 import (
5+ "fmt"
6 "github.com/xplshn/gbc/pkg/token"
7 "github.com/xplshn/gbc/pkg/util"
8+ "strings"
9 )
10
11 type NodeType int
12@@ -14,6 +16,7 @@ const (
13 Ident
14 Nil
15 Assign
16+ MultiAssign
17 BinaryOp
18 UnaryOp
19 PostfixOp
20@@ -25,7 +28,9 @@ const (
21 AutoAlloc
22 MemberAccess
23 TypeCast
24+ TypeOf
25 StructLiteral
26+ ArrayLiteral
27 FuncDecl
28 VarDecl
29 MultiVarDecl
30@@ -68,18 +73,18 @@ const (
31 TYPE_FLOAT
32 TYPE_UNTYPED
33 TYPE_NIL
34- TYPE_UNTYPED_INT
35- TYPE_UNTYPED_FLOAT
36+ TYPE_LITERAL_INT
37+ TYPE_LITERAL_FLOAT
38 )
39
40 type BxType struct {
41- Kind BxTypeKind
42- Base *BxType
43- Name string
44- ArraySize *Node
45- IsConst bool
46- StructTag string
47- Fields []*Node
48+ Kind BxTypeKind
49+ Base *BxType
50+ Name string
51+ ArraySize *Node
52+ IsConst bool
53+ StructTag string
54+ Fields []*Node
55 EnumMembers []*Node
56 }
57
58@@ -103,8 +108,8 @@ var (
59 TypeUntyped = &BxType{Kind: TYPE_UNTYPED, Name: "untyped"}
60 TypeString = &BxType{Kind: TYPE_POINTER, Base: TypeByte, Name: "string"}
61 TypeNil = &BxType{Kind: TYPE_NIL, Name: "nil"}
62- TypeUntypedInt = &BxType{Kind: TYPE_UNTYPED_INT, Name: "untyped int"}
63- TypeUntypedFloat = &BxType{Kind: TYPE_UNTYPED_FLOAT, Name: "untyped float"}
64+ TypeLiteralInt = &BxType{Kind: TYPE_LITERAL_INT, Name: "int"}
65+ TypeLiteralFloat = &BxType{Kind: TYPE_LITERAL_FLOAT, Name: "float"}
66 )
67
68 type NumberNode struct{ Value int64 }
69@@ -112,18 +117,25 @@ type FloatNumberNode struct{ Value float64 }
70 type StringNode struct{ Value string }
71 type NilNode struct{}
72 type IdentNode struct{ Name string }
73-type AssignNode struct{ Op token.Type; Lhs, Rhs *Node }
74-type BinaryOpNode struct{ Op token.Type; Left, Right *Node }
75-type UnaryOpNode struct{ Op token.Type; Expr *Node }
76-type PostfixOpNode struct{ Op token.Type; Expr *Node }
77+type AssignNode struct { Op token.Type; Lhs, Rhs *Node }
78+type MultiAssignNode struct { Op token.Type; Lhs, Rhs []*Node }
79+type BinaryOpNode struct { Op token.Type; Left, Right *Node }
80+type UnaryOpNode struct { Op token.Type; Expr *Node }
81+type PostfixOpNode struct { Op token.Type; Expr *Node }
82 type IndirectionNode struct{ Expr *Node }
83 type AddressOfNode struct{ LValue *Node }
84 type TernaryNode struct{ Cond, ThenExpr, ElseExpr *Node }
85 type SubscriptNode struct{ Array, Index *Node }
86 type MemberAccessNode struct{ Expr, Member *Node }
87-type TypeCastNode struct{ Expr *Node; TargetType *BxType }
88-type StructLiteralNode struct{ TypeNode *Node; Values []*Node; Names []*Node }
89-type FuncCallNode struct{ FuncExpr *Node; Args []*Node }
90+type TypeCastNode struct { Expr *Node; TargetType *BxType }
91+type TypeOfNode struct{ Expr *Node }
92+type StructLiteralNode struct {
93+ TypeNode *Node
94+ Values []*Node
95+ Names []*Node
96+}
97+type ArrayLiteralNode struct { ElementType *BxType; Values []*Node }
98+type FuncCallNode struct { FuncExpr *Node; Args []*Node }
99 type AutoAllocNode struct{ Size *Node }
100 type FuncDeclNode struct {
101 Name string
102@@ -143,20 +155,20 @@ type VarDeclNode struct {
103 IsDefine bool
104 }
105 type MultiVarDeclNode struct{ Decls []*Node }
106-type TypeDeclNode struct{ Name string; Type *BxType }
107-type EnumDeclNode struct{ Name string; Members []*Node }
108-type ExtrnDeclNode struct{ Names []*Node }
109+type TypeDeclNode struct { Name string; Type *BxType }
110+type EnumDeclNode struct { Name string; Members []*Node }
111+type ExtrnDeclNode struct { Names []*Node; ReturnType *BxType }
112 type IfNode struct{ Cond, ThenBody, ElseBody *Node }
113 type WhileNode struct{ Cond, Body *Node }
114 type ReturnNode struct{ Expr *Node }
115-type BlockNode struct{ Stmts []*Node; IsSynthetic bool }
116+type BlockNode struct { Stmts []*Node; IsSynthetic bool }
117 type GotoNode struct{ Label string }
118 type SwitchNode struct{ Expr, Body *Node }
119-type CaseNode struct{ Values []*Node; Body *Node }
120+type CaseNode struct { Values []*Node; Body *Node }
121 type DefaultNode struct{ Body *Node }
122 type BreakNode struct{}
123 type ContinueNode struct{}
124-type LabelNode struct{ Name string; Stmt *Node }
125+type LabelNode struct { Name string; Stmt *Node }
126 type AsmStmtNode struct{ Code string }
127 type DirectiveNode struct{ Name string }
128
129@@ -170,20 +182,28 @@ func newNode(tok token.Token, nodeType NodeType, data interface{}, children ...*
130
131 func NewNumber(tok token.Token, value int64) *Node {
132 node := newNode(tok, Number, NumberNode{Value: value})
133- node.Typ = TypeUntypedInt
134+ node.Typ = TypeLiteralInt
135 return node
136 }
137 func NewFloatNumber(tok token.Token, value float64) *Node {
138 node := newNode(tok, FloatNumber, FloatNumberNode{Value: value})
139- node.Typ = TypeUntypedFloat
140+ node.Typ = TypeLiteralFloat
141 return node
142 }
143-func NewString(tok token.Token, value string) *Node { return newNode(tok, String, StringNode{Value: value}) }
144-func NewNil(tok token.Token) *Node { return newNode(tok, Nil, NilNode{}) }
145-func NewIdent(tok token.Token, name string) *Node { return newNode(tok, Ident, IdentNode{Name: name}) }
146+func NewString(tok token.Token, value string) *Node {
147+ return newNode(tok, String, StringNode{Value: value})
148+}
149+func NewNil(tok token.Token) *Node { return newNode(tok, Nil, NilNode{}) }
150+func NewIdent(tok token.Token, name string) *Node { return newNode(tok, Ident, IdentNode{Name: name}) }
151 func NewAssign(tok token.Token, op token.Type, lhs, rhs *Node) *Node {
152 return newNode(tok, Assign, AssignNode{Op: op, Lhs: lhs, Rhs: rhs}, lhs, rhs)
153 }
154+func NewMultiAssign(tok token.Token, op token.Type, lhs, rhs []*Node) *Node {
155+ var allChildren []*Node
156+ allChildren = append(allChildren, lhs...)
157+ allChildren = append(allChildren, rhs...)
158+ return newNode(tok, MultiAssign, MultiAssignNode{Op: op, Lhs: lhs, Rhs: rhs}, allChildren...)
159+}
160 func NewBinaryOp(tok token.Token, op token.Type, left, right *Node) *Node {
161 return newNode(tok, BinaryOp, BinaryOpNode{Op: op, Left: left, Right: right}, left, right)
162 }
163@@ -211,6 +231,9 @@ func NewMemberAccess(tok token.Token, expr, member *Node) *Node {
164 func NewTypeCast(tok token.Token, expr *Node, targetType *BxType) *Node {
165 return newNode(tok, TypeCast, TypeCastNode{Expr: expr, TargetType: targetType}, expr)
166 }
167+func NewTypeOf(tok token.Token, expr *Node) *Node {
168+ return newNode(tok, TypeOf, TypeOfNode{Expr: expr}, expr)
169+}
170 func NewStructLiteral(tok token.Token, typeNode *Node, values []*Node, names []*Node) *Node {
171 node := newNode(tok, StructLiteral, StructLiteralNode{TypeNode: typeNode, Values: values, Names: names}, typeNode)
172 for _, v := range values {
173@@ -221,6 +244,13 @@ func NewStructLiteral(tok token.Token, typeNode *Node, values []*Node, names []*
174 }
175 return node
176 }
177+func NewArrayLiteral(tok token.Token, elementType *BxType, values []*Node) *Node {
178+ node := newNode(tok, ArrayLiteral, ArrayLiteralNode{ElementType: elementType, Values: values})
179+ for _, v := range values {
180+ v.Parent = node
181+ }
182+ return node
183+}
184 func NewFuncCall(tok token.Token, funcExpr *Node, args []*Node) *Node {
185 node := newNode(tok, FuncCall, FuncCallNode{FuncExpr: funcExpr, Args: args}, funcExpr)
186 for _, arg := range args {
187@@ -266,8 +296,8 @@ func NewEnumDecl(tok token.Token, name string, members []*Node) *Node {
188 }
189 return node
190 }
191-func NewExtrnDecl(tok token.Token, names []*Node) *Node {
192- node := newNode(tok, ExtrnDecl, ExtrnDeclNode{Names: names})
193+func NewExtrnDecl(tok token.Token, names []*Node, returnType *BxType) *Node {
194+ node := newNode(tok, ExtrnDecl, ExtrnDeclNode{Names: names, ReturnType: returnType})
195 for _, n := range names {
196 n.Parent = node
197 }
198@@ -318,19 +348,19 @@ func NewDirective(tok token.Token, name string) *Node {
199 }
200
201 func FoldConstants(node *Node) *Node {
202- if node == nil { return nil }
203+ if node == nil {
204+ return nil
205+ }
206
207 switch d := node.Data.(type) {
208- case AssignNode:
209- d.Rhs = FoldConstants(d.Rhs)
210- node.Data = d
211- case BinaryOpNode:
212- d.Left = FoldConstants(d.Left)
213- d.Right = FoldConstants(d.Right)
214- node.Data = d
215- case UnaryOpNode:
216- d.Expr = FoldConstants(d.Expr)
217+ case AssignNode: d.Rhs = FoldConstants(d.Rhs); node.Data = d
218+ case MultiAssignNode:
219+ for i, rhs := range d.Rhs {
220+ d.Rhs[i] = FoldConstants(rhs)
221+ }
222 node.Data = d
223+ case BinaryOpNode: d.Left = FoldConstants(d.Left); d.Right = FoldConstants(d.Right); node.Data = d
224+ case UnaryOpNode: d.Expr = FoldConstants(d.Expr); node.Data = d
225 case TernaryNode:
226 d.Cond = FoldConstants(d.Cond)
227 if d.Cond.Type == Number {
228@@ -367,14 +397,21 @@ func FoldConstants(node *Node) *Node {
229 case token.Lte: if l <= r { res = 1 }
230 case token.Gte: if l >= r { res = 1 }
231 case token.Slash:
232- if r == 0 { util.Error(node.Tok, "Compile-time division by zero") }
233+ if r == 0 {
234+ util.Error(node.Tok, "Compile-time division by zero")
235+ }
236 res = l / r
237 case token.Rem:
238- if r == 0 { util.Error(node.Tok, "Compile-time modulo by zero") }
239+ if r == 0 {
240+ util.Error(node.Tok, "Compile-time modulo by zero")
241+ }
242 res = l % r
243- default: folded = false
244+ default:
245+ folded = false
246+ }
247+ if folded {
248+ return NewNumber(node.Tok, res)
249 }
250- if folded { return NewNumber(node.Tok, res) }
251 }
252 case UnaryOp:
253 d := node.Data.(UnaryOpNode)
254@@ -388,8 +425,56 @@ func FoldConstants(node *Node) *Node {
255 case token.Not: if val == 0 { res = 1 }
256 default: folded = false
257 }
258- if folded { return NewNumber(node.Tok, res) }
259+ if folded {
260+ return NewNumber(node.Tok, res)
261+ }
262 }
263 }
264 return node
265 }
266+
267+// TypeToString converts a BxType to its string representation
268+func TypeToString(t *BxType) string {
269+ if t == nil {
270+ return "<nil>"
271+ }
272+ var sb strings.Builder
273+ if t.IsConst {
274+ sb.WriteString("const ")
275+ }
276+ switch t.Kind {
277+ case TYPE_PRIMITIVE, TYPE_BOOL, TYPE_FLOAT, TYPE_LITERAL_INT, TYPE_LITERAL_FLOAT:
278+ sb.WriteString(t.Name)
279+ case TYPE_POINTER:
280+ sb.WriteString("*")
281+ sb.WriteString(TypeToString(t.Base))
282+ case TYPE_ARRAY:
283+ sb.WriteString("[]")
284+ sb.WriteString(TypeToString(t.Base))
285+ case TYPE_STRUCT:
286+ sb.WriteString("struct ")
287+ if t.Name != "" {
288+ sb.WriteString(t.Name)
289+ } else if t.StructTag != "" {
290+ sb.WriteString(t.StructTag)
291+ } else {
292+ sb.WriteString("<anonymous>")
293+ }
294+ case TYPE_ENUM:
295+ sb.WriteString("enum ")
296+ if t.Name != "" {
297+ sb.WriteString(t.Name)
298+ } else {
299+ sb.WriteString("<anonymous>")
300+ }
301+ case TYPE_VOID:
302+ sb.WriteString("void")
303+ case TYPE_UNTYPED:
304+ sb.WriteString("untyped")
305+ case TYPE_NIL:
306+ sb.WriteString("nil")
307+ default:
308+ sb.WriteString(fmt.Sprintf("<unknown_type_kind_%d>", t.Kind))
309+ }
310+ return sb.String()
311+}
+16,
-37
1@@ -1,4 +1,3 @@
2-// package cli is ugly and tries but fails miserably at being a general-purpose, usable CLi library
3 package cli
4
5 import (
6@@ -12,11 +11,7 @@ import (
7 "golang.org/x/term"
8 )
9
10-// IndentState manages hierarchical indentation levels
11-type IndentState struct {
12- levels []uint8
13- baseUnit uint8
14-}
15+type IndentState struct { levels []uint8; baseUnit uint8 }
16
17 func NewIndentState() *IndentState {
18 return &IndentState{
19@@ -45,7 +40,6 @@ func (is *IndentState) AtLevel(level int) string {
20 return strings.Repeat(" ", int(is.baseUnit*uint8(level)))
21 }
22
23-// Value is the interface to the dynamic value stored in a flag
24 type Value interface {
25 String() string
26 Set(string) error
27@@ -54,16 +48,15 @@ type Value interface {
28
29 type stringValue struct{ p *string }
30
31-func (v *stringValue) Set(s string) error { *v.p = s; return nil }
32-func (v *stringValue) String() string { return *v.p }
33-func (v *stringValue) Get() any { return *v.p }
34+func (v *stringValue) Set(s string) error { *v.p = s; return nil }
35+func (v *stringValue) String() string { return *v.p }
36+func (v *stringValue) Get() any { return *v.p }
37 func newStringValue(p *string) *stringValue { return &stringValue{p} }
38
39 type boolValue struct{ p *bool }
40
41 func (v *boolValue) Set(s string) error {
42 val, err := strconv.ParseBool(s)
43- // Allow setting a bool flag without a value, e.g., --verbose
44 if err != nil && s != "" {
45 return fmt.Errorf("invalid boolean value '%s': %w", s, err)
46 }
47@@ -78,9 +71,9 @@ func newBoolValue(p *bool) *boolValue {
48
49 type listValue struct{ p *[]string }
50
51-func (v *listValue) Set(s string) error { *v.p = append(*v.p, s); return nil }
52-func (v *listValue) String() string { return strings.Join(*v.p, ", ") }
53-func (v *listValue) Get() any { return *v.p }
54+func (v *listValue) Set(s string) error { *v.p = append(*v.p, s); return nil }
55+func (v *listValue) String() string { return strings.Join(*v.p, ", ") }
56+func (v *listValue) Get() any { return *v.p }
57 func newListValue(p *[]string) *listValue { return &listValue{p} }
58
59 type Flag struct {
60@@ -89,21 +82,20 @@ type Flag struct {
61 Usage string
62 Value Value
63 DefValue string
64- ExpectedType string // Type placeholder for non-boolean flags (e.g., "<file>")
65+ ExpectedType string
66 }
67
68-// FlagGroup is a collection of related flags, like feature or warning flags
69 type FlagGroup struct {
70 Name string
71 Description string
72 Flags []FlagGroupEntry
73- GroupType string // e.g., "warning flag", "feature flag"
74- AvailableFlagsHeader string // e.g., "Available Warning Flags:"
75+ GroupType string
76+ AvailableFlagsHeader string
77 }
78
79 type FlagGroupEntry struct {
80- Name string // Name without prefix (e.g., "all" for "-Wall")
81- Prefix string // Group prefix (e.g., "W", "F")
82+ Name string
83+ Prefix string
84 Usage string
85 Enabled *bool
86 Disabled *bool
87@@ -139,20 +131,17 @@ func (f *FlagSet) Bool(p *bool, name, shorthand string, value bool, usage string
88 f.Var(newBoolValue(p), name, shorthand, usage, strconv.FormatBool(value), "")
89 }
90
91-// List defines a flag that can be specified multiple times to build a list of strings
92 func (f *FlagSet) List(p *[]string, name, shorthand string, value []string, usage, expectedType string) {
93 *p = value
94 f.Var(newListValue(p), name, shorthand, usage, fmt.Sprintf("%v", value), expectedType)
95 }
96
97-// Special defines a flag with a prefix that captures the value directly, like -lm for library 'm'
98 func (f *FlagSet) Special(p *[]string, prefix, usage, expectedType string) {
99 *p = []string{}
100 f.Var(newListValue(p), prefix, "", usage, "", expectedType)
101 f.specialPrefix[prefix] = f.flags[prefix]
102 }
103
104-// DefineGroupFlags registers the enable/disable flags for a set of flag group entries.
105 func (f *FlagSet) DefineGroupFlags(entries []FlagGroupEntry) {
106 for i := range entries {
107 if entries[i].Enabled != nil {
108@@ -210,7 +199,6 @@ func (f *FlagSet) Parse(arguments []string) error {
109 return err
110 }
111 } else {
112- // Check if it's a long option with a single dash, e.g., -std=b or -pedantic
113 name := arg[1:]
114 if strings.Contains(name, "=") {
115 name = strings.SplitN(name, "=", 2)[0]
116@@ -218,7 +206,6 @@ func (f *FlagSet) Parse(arguments []string) error {
117
118 flag, ok := f.flags[name]
119 if ok {
120- // It's a long flag with a single dash. Parse it.
121 parts := strings.SplitN(arg[1:], "=", 2)
122 if len(parts) == 2 {
123 if err := flag.Value.Set(parts[1]); err != nil {
124@@ -240,7 +227,6 @@ func (f *FlagSet) Parse(arguments []string) error {
125 }
126 }
127 } else {
128- // Fallback to original short flag parsing
129 if err := f.parseShortFlag(arg, arguments, &i); err != nil {
130 return err
131 }
132@@ -264,7 +250,7 @@ func (f *FlagSet) parseLongFlag(arg string, arguments []string, i *int) error {
133 return flag.Value.Set(parts[1])
134 }
135 if _, isBool := flag.Value.(*boolValue); isBool {
136- return flag.Value.Set("") // E.g., --verbose
137+ return flag.Value.Set("")
138 }
139 if *i+1 >= len(arguments) {
140 return fmt.Errorf("flag needs an argument: --%s", name)
141@@ -274,7 +260,6 @@ func (f *FlagSet) parseLongFlag(arg string, arguments []string, i *int) error {
142 }
143
144 func (f *FlagSet) parseShortFlag(arg string, arguments []string, i *int) error {
145- // Handle special prefix flags like -I/path/to/include or -lm
146 for prefix, flag := range f.specialPrefix {
147 if strings.HasPrefix(arg, "-"+prefix) && len(arg) > len(prefix)+1 {
148 return flag.Value.Set(arg[len(prefix)+1:])
149@@ -287,9 +272,8 @@ func (f *FlagSet) parseShortFlag(arg string, arguments []string, i *int) error {
150 return fmt.Errorf("unknown shorthand flag: -%s", shorthand)
151 }
152 if _, isBool := flag.Value.(*boolValue); isBool {
153- return flag.Value.Set("") // E.g., -h
154+ return flag.Value.Set("")
155 }
156- // assume -o <val> and not combined short flags
157 value := arg[2:]
158 if value == "" {
159 if *i+1 >= len(arguments) {
160@@ -347,12 +331,10 @@ func (a *App) generateUsagePage(w *os.File) {
161 termWidth := getTerminalWidth()
162 indent := NewIndentState()
163
164- // Use [] for mandatory and <> for optional as requested
165 fmt.Fprintf(&sb, "Usage: %s <options> [input.b] ...\n", a.Name)
166
167 optionFlags := a.getOptionFlags()
168 if len(optionFlags) > 0 {
169- // Calculate max widths for alignment within the options section
170 maxFlagWidth := 0
171 maxUsageWidth := 0
172 for _, flag := range optionFlags {
173@@ -385,14 +367,13 @@ func (a *App) generateHelpPage(w *os.File) {
174
175 globalMaxWidth := a.calculateGlobalMaxWidth()
176
177- // Calculate the maximum usage string width across all flag sections for alignment
178 globalMaxUsageWidth := 0
179 updateMaxUsage := func(s string) {
180 if len(s) > globalMaxUsageWidth {
181 globalMaxUsageWidth = len(s)
182 }
183 }
184- optionFlags := a.getOptionFlags() // Get once to reuse
185+ optionFlags := a.getOptionFlags()
186 for _, flag := range optionFlags {
187 updateMaxUsage(flag.Usage)
188 }
189@@ -412,7 +393,6 @@ func (a *App) generateHelpPage(w *os.File) {
190 if a.Synopsis != "" {
191 sb.WriteString("\n")
192 fmt.Fprintf(&sb, "%sSynopsis\n", indent.AtLevel(1))
193- // Use [] for mandatory and <> for optional as requested for the synopsis
194 synopsis := strings.ReplaceAll(a.Synopsis, "[", "<")
195 synopsis = strings.ReplaceAll(synopsis, "]", ">")
196 fmt.Fprintf(&sb, "%s%s %s\n", indent.AtLevel(2), a.Name, synopsis)
197@@ -507,7 +487,6 @@ func (a *App) formatFlagString(flag *Flag) string {
198 } else {
199 fmt.Fprintf(&flagStr, "--%s", flag.Name)
200 if !isBool {
201- // Use equals for long flags that take a value for clarity
202 if flag.ExpectedType != "" {
203 fmt.Fprintf(&flagStr, "=%s", flag.ExpectedType)
204 }
205@@ -583,7 +562,7 @@ func (a *App) formatFlagGroup(sb *strings.Builder, group FlagGroup, indent *Inde
206 prefix := group.Flags[0].Prefix
207 groupType := group.GroupType
208 if groupType == "" {
209- groupType = "flag" // Default value if not provided
210+ groupType = "flag"
211 }
212
213 fmt.Fprintf(sb, "%s%-*s Enable a specific %s\n", indent.AtLevel(2), globalMaxWidth, fmt.Sprintf("-%s<%s>", prefix, groupType), groupType)
+1,
-0
1@@ -9,4 +9,5 @@ import (
2 // Backend is an interface for code generation backends
3 type Backend interface {
4 Generate(prog *ir.Program, cfg *config.Config) (*bytes.Buffer, error)
5+ GenerateIR(prog *ir.Program, cfg *config.Config) (string, error)
6 }
+186,
-137
1@@ -32,15 +32,9 @@ type symbol struct {
2 Node *ast.Node
3 }
4
5-type scope struct {
6- Symbols *symbol
7- Parent *scope
8-}
9+type scope struct{ Symbols *symbol; Parent *scope }
10
11-type autoVarInfo struct {
12- Node *ast.Node
13- Size int64
14-}
15+type autoVarInfo struct{ Node *ast.Node; Size int64 }
16
17 type Context struct {
18 prog *ir.Program
19@@ -81,38 +75,29 @@ func newScope(parent *scope) *scope { return &scope{Parent: parent} }
20
21 func (ctx *Context) enterScope() { ctx.currentScope = newScope(ctx.currentScope) }
22 func (ctx *Context) exitScope() {
23- if ctx.currentScope.Parent != nil {
24- ctx.currentScope = ctx.currentScope.Parent
25- }
26+ if ctx.currentScope.Parent != nil { ctx.currentScope = ctx.currentScope.Parent }
27 }
28
29 func (ctx *Context) findSymbol(name string) *symbol {
30- for s := ctx.currentScope; s != nil; s = s.Parent {
31- for sym := s.Symbols; sym != nil; sym = sym.Next {
32- if sym.Name == name && sym.Type != symType {
33- return sym
34- }
35- }
36- }
37- return nil
38+ return ctx.findSymbolOfType(name, -1, false) // -1 means any type except symType
39 }
40
41 func (ctx *Context) findTypeSymbol(name string) *symbol {
42- for s := ctx.currentScope; s != nil; s = s.Parent {
43- for sym := s.Symbols; sym != nil; sym = sym.Next {
44- if sym.Name == name && sym.Type == symType {
45- return sym
46- }
47- }
48- }
49- return nil
50+ return ctx.findSymbolOfType(name, symType, false)
51 }
52
53 func (ctx *Context) findSymbolInCurrentScope(name string) *symbol {
54- for sym := ctx.currentScope.Symbols; sym != nil; sym = sym.Next {
55- if sym.Name == name {
56- return sym
57+ return ctx.findSymbolOfType(name, -1, true) // any type, current scope only
58+}
59+
60+func (ctx *Context) findSymbolOfType(name string, wantType symbolType, currentOnly bool) *symbol {
61+ for s := ctx.currentScope; s != nil; s = s.Parent {
62+ for sym := s.Symbols; sym != nil; sym = sym.Next {
63+ if sym.Name == name {
64+ if wantType == -1 || (wantType == symType) == (sym.Type == symType) { return sym }
65+ }
66 }
67+ if currentOnly { break }
68 }
69 return nil
70 }
71@@ -129,10 +114,8 @@ func (ctx *Context) addSymbol(name string, symType symbolType, bxType *ast.BxTyp
72 t.Name = name
73 }
74 }
75- case symFunc, symExtrn:
76- irVal = &ir.Global{Name: name}
77- case symLabel:
78- irVal = &ir.Label{Name: name}
79+ case symFunc, symExtrn: irVal = &ir.Global{Name: name}
80+ case symLabel: irVal = &ir.Label{Name: name}
81 }
82
83 sym := &symbol{
84@@ -162,38 +145,28 @@ func (ctx *Context) startBlock(label *ir.Label) {
85 }
86
87 func (ctx *Context) addInstr(instr *ir.Instruction) {
88- if ctx.currentBlock == nil {
89- ctx.startBlock(ctx.newLabel())
90- }
91+ if ctx.currentBlock == nil { ctx.startBlock(ctx.newLabel()) }
92 ctx.currentBlock.Instructions = append(ctx.currentBlock.Instructions, instr)
93 }
94
95 func (ctx *Context) addString(value string) ir.Value {
96- if label, ok := ctx.prog.Strings[value]; ok {
97- return &ir.Global{Name: label}
98- }
99+ if label, ok := ctx.prog.Strings[value]; ok { return &ir.Global{Name: label} }
100 label := fmt.Sprintf("str%d", len(ctx.prog.Strings))
101 ctx.prog.Strings[value] = label
102 return &ir.Global{Name: label}
103 }
104
105 func (ctx *Context) evalConstExpr(node *ast.Node) (int64, bool) {
106- if node == nil {
107- return 0, false
108- }
109+ if node == nil { return 0, false }
110 folded := ast.FoldConstants(node)
111- if folded.Type == ast.Number {
112- return folded.Data.(ast.NumberNode).Value, true
113- }
114+ if folded.Type == ast.Number { return folded.Data.(ast.NumberNode).Value, true }
115 if folded.Type == ast.Ident {
116 identName := folded.Data.(ast.IdentNode).Name
117 sym := ctx.findSymbol(identName)
118 if sym != nil && sym.Node != nil && sym.Node.Type == ast.VarDecl {
119 decl := sym.Node.Data.(ast.VarDeclNode)
120 if len(decl.InitList) == 1 {
121- if decl.InitList[0] == node {
122- return 0, false
123- }
124+ if decl.InitList[0] == node { return 0, false }
125 return ctx.evalConstExpr(decl.InitList[0])
126 }
127 }
128@@ -202,14 +175,11 @@ func (ctx *Context) evalConstExpr(node *ast.Node) (int64, bool) {
129 }
130
131 func (ctx *Context) getSizeof(typ *ast.BxType) int64 {
132- if typ == nil || typ.Kind == ast.TYPE_UNTYPED {
133- return int64(ctx.wordSize)
134- }
135+ if typ == nil || typ.Kind == ast.TYPE_UNTYPED { return int64(ctx.wordSize) }
136+
137 switch typ.Kind {
138- case ast.TYPE_VOID:
139- return 0
140- case ast.TYPE_POINTER:
141- return int64(ctx.wordSize)
142+ case ast.TYPE_VOID: return 0
143+ case ast.TYPE_POINTER: return int64(ctx.wordSize)
144 case ast.TYPE_ARRAY:
145 elemSize := ctx.getSizeof(typ.Base)
146 var arrayLen int64 = 1
147@@ -221,35 +191,24 @@ func (ctx *Context) getSizeof(typ *ast.BxType) int64 {
148 }
149 }
150 return elemSize * arrayLen
151- case ast.TYPE_PRIMITIVE, ast.TYPE_UNTYPED_INT:
152- switch typ.Name {
153- case "int", "uint", "string":
154- return int64(ctx.wordSize)
155- case "int64", "uint64":
156- return 8
157- case "int32", "uint32":
158- return 4
159- case "int16", "uint16":
160- return 2
161- case "byte", "bool", "int8", "uint8":
162- return 1
163- default:
164- if sym := ctx.findTypeSymbol(typ.Name); sym != nil {
165- return ctx.getSizeof(sym.BxType)
166- }
167- return int64(ctx.wordSize)
168+ case ast.TYPE_PRIMITIVE, ast.TYPE_LITERAL_INT:
169+ resolver := ir.NewTypeSizeResolver(ctx.wordSize)
170+ if size := resolver.GetTypeSize(typ.Name); size > 0 {
171+ return size
172+ }
173+ // Fallback for user-defined types
174+ if sym := ctx.findTypeSymbol(typ.Name); sym != nil {
175+ return ctx.getSizeof(sym.BxType)
176 }
177+ return int64(ctx.wordSize)
178 case ast.TYPE_ENUM:
179 return ctx.getSizeof(ast.TypeInt)
180- case ast.TYPE_FLOAT, ast.TYPE_UNTYPED_FLOAT:
181- switch typ.Name {
182- case "float", "float32":
183- return 4
184- case "float64":
185- return 8
186- default:
187- return 4
188+ case ast.TYPE_FLOAT, ast.TYPE_LITERAL_FLOAT:
189+ if typ.Kind == ast.TYPE_LITERAL_FLOAT {
190+ return int64(ctx.wordSize)
191 }
192+ resolver := ir.NewTypeSizeResolver(ctx.wordSize)
193+ return resolver.GetTypeSize(typ.Name)
194 case ast.TYPE_STRUCT:
195 var totalSize, maxAlign int64 = 0, 1
196 for _, field := range typ.Fields {
197@@ -270,37 +229,25 @@ func (ctx *Context) getSizeof(typ *ast.BxType) int64 {
198 }
199
200 func (ctx *Context) getAlignof(typ *ast.BxType) int64 {
201- if typ == nil {
202- return int64(ctx.wordSize)
203- }
204+ if typ == nil { return int64(ctx.wordSize) }
205
206 if (typ.Kind == ast.TYPE_PRIMITIVE || typ.Kind == ast.TYPE_STRUCT) && typ.Name != "" {
207 if sym := ctx.findTypeSymbol(typ.Name); sym != nil {
208- if sym.BxType != typ {
209- return ctx.getAlignof(sym.BxType)
210- }
211+ if sym.BxType != typ { return ctx.getAlignof(sym.BxType) }
212 }
213 }
214
215- if typ.Kind == ast.TYPE_UNTYPED {
216- return int64(ctx.wordSize)
217- }
218+ if typ.Kind == ast.TYPE_UNTYPED { return int64(ctx.wordSize) }
219 switch typ.Kind {
220- case ast.TYPE_VOID:
221- return 1
222- case ast.TYPE_POINTER:
223- return int64(ctx.wordSize)
224- case ast.TYPE_ARRAY:
225- return ctx.getAlignof(typ.Base)
226- case ast.TYPE_PRIMITIVE, ast.TYPE_FLOAT, ast.TYPE_ENUM, ast.TYPE_UNTYPED_INT, ast.TYPE_UNTYPED_FLOAT:
227- return ctx.getSizeof(typ)
228+ case ast.TYPE_VOID: return 1
229+ case ast.TYPE_POINTER: return int64(ctx.wordSize)
230+ case ast.TYPE_ARRAY: return ctx.getAlignof(typ.Base)
231+ case ast.TYPE_PRIMITIVE, ast.TYPE_FLOAT, ast.TYPE_ENUM, ast.TYPE_LITERAL_INT, ast.TYPE_LITERAL_FLOAT: return ctx.getSizeof(typ)
232 case ast.TYPE_STRUCT:
233 var maxAlign int64 = 1
234 for _, field := range typ.Fields {
235 fieldAlign := ctx.getAlignof(field.Data.(ast.VarDeclNode).Type)
236- if fieldAlign > maxAlign {
237- maxAlign = fieldAlign
238- }
239+ if fieldAlign > maxAlign { maxAlign = fieldAlign }
240 }
241 return maxAlign
242 }
243@@ -320,9 +267,7 @@ func (ctx *Context) GenerateIR(root *ast.Node) (*ir.Program, string) {
244 }
245
246 func walkAST(node *ast.Node, visitor func(n *ast.Node)) {
247- if node == nil {
248- return
249- }
250+ if node == nil { return }
251 visitor(node)
252
253 switch d := node.Data.(type) {
254@@ -392,7 +337,9 @@ func walkAST(node *ast.Node, visitor func(n *ast.Node)) {
255 }
256
257 func (ctx *Context) collectGlobals(node *ast.Node) {
258- if node == nil { return }
259+ if node == nil {
260+ return
261+ }
262
263 switch node.Type {
264 case ast.Block:
265@@ -468,7 +415,9 @@ func (ctx *Context) findByteArrays(root *ast.Node) {
266 for {
267 changedInPass := false
268 visitor := func(n *ast.Node) {
269- if n == nil { return }
270+ if n == nil {
271+ return
272+ }
273 switch n.Type {
274 case ast.VarDecl:
275 d := n.Data.(ast.VarDeclNode)
276@@ -480,9 +429,13 @@ func (ctx *Context) findByteArrays(root *ast.Node) {
277 }
278 case ast.Assign:
279 d := n.Data.(ast.AssignNode)
280- if d.Lhs.Type != ast.Ident { return }
281+ if d.Lhs.Type != ast.Ident {
282+ return
283+ }
284 lhsSym := ctx.findSymbol(d.Lhs.Data.(ast.IdentNode).Name)
285- if lhsSym == nil || lhsSym.IsByteArray { return }
286+ if lhsSym == nil || lhsSym.IsByteArray {
287+ return
288+ }
289 rhsIsByteArray := false
290 switch d.Rhs.Type {
291 case ast.String:
292@@ -499,7 +452,9 @@ func (ctx *Context) findByteArrays(root *ast.Node) {
293 }
294 }
295 walkAST(root, visitor)
296- if !changedInPass { break }
297+ if !changedInPass {
298+ break
299+ }
300 }
301 }
302
303@@ -544,11 +499,43 @@ func (ctx *Context) codegenMemberAccessAddr(node *ast.Node) ir.Value {
304 if structType.Kind == ast.TYPE_POINTER {
305 structAddr, _ = ctx.codegenExpr(d.Expr)
306 } else {
307- structAddr = ctx.codegenLvalue(d.Expr)
308+ // Check if this is a struct parameter (which is passed as pointer)
309+ if d.Expr.Type == ast.Ident {
310+ name := d.Expr.Data.(ast.IdentNode).Name
311+ if sym := ctx.findSymbol(name); sym != nil {
312+ // Check if this is a function parameter that has struct type
313+ isStructParam := false
314+ if sym.Node != nil && sym.Node.Parent != nil && sym.Node.Parent.Type == ast.FuncDecl {
315+ // Resolve the struct type
316+ paramStructType := structType
317+ if paramStructType != nil && paramStructType.Kind != ast.TYPE_STRUCT && paramStructType.Name != "" {
318+ if typeSym := ctx.findTypeSymbol(paramStructType.Name); typeSym != nil && typeSym.BxType.Kind == ast.TYPE_STRUCT {
319+ paramStructType = typeSym.BxType
320+ }
321+ }
322+ if paramStructType != nil && paramStructType.Kind == ast.TYPE_STRUCT {
323+ isStructParam = true
324+ }
325+ }
326+
327+ if isStructParam {
328+ // For struct parameters, use the parameter value directly (it's already a pointer)
329+ structAddr, _ = ctx.codegenExpr(d.Expr)
330+ } else {
331+ structAddr = ctx.codegenLvalue(d.Expr)
332+ }
333+ } else {
334+ structAddr = ctx.codegenLvalue(d.Expr)
335+ }
336+ } else {
337+ structAddr = ctx.codegenLvalue(d.Expr)
338+ }
339 }
340
341 baseType := structType
342- if baseType.Kind == ast.TYPE_POINTER { baseType = baseType.Base }
343+ if baseType.Kind == ast.TYPE_POINTER {
344+ baseType = baseType.Base
345+ }
346
347 if baseType.Kind != ast.TYPE_STRUCT && baseType.Name != "" {
348 if sym := ctx.findTypeSymbol(baseType.Name); sym != nil && sym.BxType.Kind == ast.TYPE_STRUCT {
349@@ -564,10 +551,13 @@ func (ctx *Context) codegenMemberAccessAddr(node *ast.Node) ir.Value {
350 var offset int64
351 found := false
352 memberName := d.Member.Data.(ast.IdentNode).Name
353+
354 for _, fieldNode := range baseType.Fields {
355 fieldData := fieldNode.Data.(ast.VarDeclNode)
356 fieldAlign := ctx.getAlignof(fieldData.Type)
357+
358 offset = util.AlignUp(offset, fieldAlign)
359+
360 if fieldData.Name == memberName {
361 found = true
362 break
363@@ -580,7 +570,9 @@ func (ctx *Context) codegenMemberAccessAddr(node *ast.Node) ir.Value {
364 return nil
365 }
366
367- if offset == 0 { return structAddr }
368+ if offset == 0 {
369+ return structAddr
370+ }
371
372 resultAddr := ctx.newTemp()
373 ctx.addInstr(&ir.Instruction{
374@@ -652,7 +644,9 @@ func (ctx *Context) codegenLogicalCond(node *ast.Node, trueL, falseL *ir.Label)
375 }
376
377 func (ctx *Context) codegenExpr(node *ast.Node) (result ir.Value, terminates bool) {
378- if node == nil { return &ir.Const{Value: 0}, false }
379+ if node == nil {
380+ return &ir.Const{Value: 0}, false
381+ }
382
383 switch node.Type {
384 case ast.Number:
385@@ -668,6 +662,8 @@ func (ctx *Context) codegenExpr(node *ast.Node) (result ir.Value, terminates boo
386 return ctx.codegenIdent(node)
387 case ast.Assign:
388 return ctx.codegenAssign(node)
389+ case ast.MultiAssign:
390+ return ctx.codegenMultiAssign(node)
391 case ast.BinaryOp:
392 return ctx.codegenBinaryOp(node)
393 case ast.UnaryOp:
394@@ -685,15 +681,21 @@ func (ctx *Context) codegenExpr(node *ast.Node) (result ir.Value, terminates boo
395 return ctx.codegenFuncCall(node)
396 case ast.TypeCast:
397 return ctx.codegenTypeCast(node)
398+ case ast.TypeOf:
399+ return ctx.codegenTypeOf(node)
400 case ast.Ternary:
401 return ctx.codegenTernary(node)
402 case ast.AutoAlloc:
403 return ctx.codegenAutoAlloc(node)
404 case ast.StructLiteral:
405 return ctx.codegenStructLiteral(node)
406+ case ast.ArrayLiteral:
407+ return ctx.codegenArrayLiteral(node)
408 case ast.MemberAccess:
409 addr := ctx.codegenMemberAccessAddr(node)
410- if addr == nil { return nil, true }
411+ if addr == nil {
412+ return nil, true
413+ }
414 return ctx.genLoad(addr, node.Typ), false
415 }
416 util.Error(node.Tok, "Internal error: unhandled expression type in codegen: %v", node.Type)
417@@ -701,11 +703,15 @@ func (ctx *Context) codegenExpr(node *ast.Node) (result ir.Value, terminates boo
418 }
419
420 func (ctx *Context) codegenStmt(node *ast.Node) (terminates bool) {
421- if node == nil { return false }
422+ if node == nil {
423+ return false
424+ }
425 switch node.Type {
426 case ast.Block:
427 isRealBlock := !node.Data.(ast.BlockNode).IsSynthetic
428- if isRealBlock { ctx.enterScope() }
429+ if isRealBlock {
430+ ctx.enterScope()
431+ }
432 var blockTerminates bool
433 for _, stmt := range node.Data.(ast.BlockNode).Stmts {
434 if blockTerminates {
435@@ -719,7 +725,9 @@ func (ctx *Context) codegenStmt(node *ast.Node) (terminates bool) {
436 }
437 blockTerminates = ctx.codegenStmt(stmt)
438 }
439- if isRealBlock { ctx.exitScope() }
440+ if isRealBlock {
441+ ctx.exitScope()
442+ }
443 return blockTerminates
444
445 case ast.FuncDecl:
446@@ -733,7 +741,14 @@ func (ctx *Context) codegenStmt(node *ast.Node) (terminates bool) {
447 ctx.codegenVarDecl(decl)
448 }
449 return false
450- case ast.TypeDecl, ast.Directive, ast.EnumDecl:
451+ case ast.TypeDecl, ast.Directive:
452+ return false
453+ case ast.EnumDecl:
454+ // Process enum members as global variable declarations
455+ d := node.Data.(ast.EnumDeclNode)
456+ for _, memberNode := range d.Members {
457+ ctx.codegenVarDecl(memberNode)
458+ }
459 return false
460 case ast.ExtrnDecl:
461 d := node.Data.(ast.ExtrnDeclNode)
462@@ -768,13 +783,17 @@ func (ctx *Context) codegenStmt(node *ast.Node) (terminates bool) {
463 return true
464
465 case ast.Break:
466- if ctx.breakLabel == nil { util.Error(node.Tok, "'break' not in a loop or switch") }
467+ if ctx.breakLabel == nil {
468+ util.Error(node.Tok, "'break' not in a loop or switch")
469+ }
470 ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{ctx.breakLabel}})
471 ctx.currentBlock = nil
472 return true
473
474 case ast.Continue:
475- if ctx.continueLabel == nil { util.Error(node.Tok, "'continue' not in a loop") }
476+ if ctx.continueLabel == nil {
477+ util.Error(node.Tok, "'continue' not in a loop")
478+ }
479 ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{ctx.continueLabel}})
480 ctx.currentBlock = nil
481 return true
482@@ -821,14 +840,18 @@ func (ctx *Context) codegenSwitch(node *ast.Node) bool {
483 var caseOrder []*ast.Node
484 var findCasesRecursive func(*ast.Node)
485 findCasesRecursive = func(n *ast.Node) {
486- if n == nil || (n.Type == ast.Switch && n != node) { return }
487+ if n == nil || (n.Type == ast.Switch && n != node) {
488+ return
489+ }
490 if n.Type == ast.Case || n.Type == ast.Default {
491 if _, exists := caseLabels[n]; !exists {
492 label := ctx.newLabel()
493 caseLabels[n] = label
494 caseOrder = append(caseOrder, n)
495 if n.Type == ast.Default {
496- if defaultTarget != nil { util.Error(n.Tok, "multiple default labels in switch") }
497+ if defaultTarget != nil {
498+ util.Error(n.Tok, "multiple default labels in switch")
499+ }
500 defaultTarget = label
501 }
502 }
503@@ -853,7 +876,9 @@ func (ctx *Context) codegenSwitch(node *ast.Node) bool {
504 }
505 findCasesRecursive(d.Body)
506
507- if defaultTarget == nil { defaultTarget = endLabel }
508+ if defaultTarget == nil {
509+ defaultTarget = endLabel
510+ }
511
512 for _, caseStmt := range caseOrder {
513 if caseStmt.Type == ast.Case {
514@@ -901,7 +926,9 @@ func (ctx *Context) codegenSwitch(node *ast.Node) bool {
515 }
516
517 func (ctx *Context) findAllAutosInFunc(node *ast.Node, autoVars *[]autoVarInfo, definedNames map[string]bool) {
518- if node == nil { return }
519+ if node == nil {
520+ return
521+ }
522 if node.Type == ast.VarDecl {
523 varData := node.Data.(ast.VarDeclNode)
524 if !definedNames[varData.Name] {
525@@ -966,7 +993,9 @@ func (ctx *Context) codegenFuncDecl(node *ast.Node) {
526 ctx.inlineAsm += fmt.Sprintf(".globl %s\n%s:\n\t%s\n", d.Name, d.Name, asmCode)
527 return
528 }
529- if d.Body == nil { return }
530+ if d.Body == nil {
531+ return
532+ }
533
534 irReturnType := ir.GetType(d.ReturnType, ctx.wordSize)
535 fn := &ir.Func{
536@@ -1065,7 +1094,9 @@ func (ctx *Context) codegenFuncDecl(node *ast.Node) {
537 break
538 }
539 }
540- if originalIndex == 1 { isVec = true }
541+ if originalIndex == 1 {
542+ isVec = true
543+ }
544 }
545 } else {
546 varData := local.Node.Data.(ast.VarDeclNode)
547@@ -1126,15 +1157,12 @@ func (ctx *Context) codegenFuncDecl(node *ast.Node) {
548 func (ctx *Context) codegenGlobalConst(node *ast.Node) ir.Value {
549 folded := ast.FoldConstants(node)
550 switch folded.Type {
551- case ast.Number:
552- return &ir.Const{Value: folded.Data.(ast.NumberNode).Value}
553+ case ast.Number: return &ir.Const{Value: folded.Data.(ast.NumberNode).Value}
554 case ast.FloatNumber:
555 typ := ir.GetType(folded.Typ, ctx.wordSize)
556 return &ir.FloatConst{Value: folded.Data.(ast.FloatNumberNode).Value, Typ: typ}
557- case ast.String:
558- return ctx.addString(folded.Data.(ast.StringNode).Value)
559- case ast.Nil:
560- return &ir.Const{Value: 0}
561+ case ast.String: return ctx.addString(folded.Data.(ast.StringNode).Value)
562+ case ast.Nil: return &ir.Const{Value: 0}
563 case ast.Ident:
564 name := folded.Data.(ast.IdentNode).Name
565 sym := ctx.findSymbol(name)
566@@ -1142,6 +1170,10 @@ func (ctx *Context) codegenGlobalConst(node *ast.Node) ir.Value {
567 util.Error(node.Tok, "Undefined symbol '%s' in global initializer", name)
568 return nil
569 }
570+ // Try to evaluate as a constant expression (for enum constants)
571+ if val, ok := ctx.evalConstExpr(folded); ok {
572+ return &ir.Const{Value: val}
573+ }
574 return sym.IRVal
575 case ast.AddressOf:
576 lval := folded.Data.(ast.AddressOfNode).LValue
577@@ -1182,7 +1214,9 @@ func (ctx *Context) codegenVarDecl(node *ast.Node) {
578 }
579
580 func (ctx *Context) codegenLocalVarDecl(d ast.VarDeclNode, sym *symbol) {
581- if len(d.InitList) == 0 { return }
582+ if len(d.InitList) == 0 {
583+ return
584+ }
585
586 if d.IsVector || (d.Type != nil && d.Type.Kind == ast.TYPE_ARRAY) {
587 vectorPtr, _ := ctx.codegenExpr(&ast.Node{Type: ast.Ident, Data: ast.IdentNode{Name: d.Name}, Tok: sym.Node.Tok})
588@@ -1220,7 +1254,16 @@ func (ctx *Context) codegenLocalVarDecl(d ast.VarDeclNode, sym *symbol) {
589 varType = initExpr.Typ
590 }
591
592- if sym.BxType == nil || sym.BxType.Kind == ast.TYPE_UNTYPED { sym.BxType = varType }
593+ if sym.BxType == nil || sym.BxType.Kind == ast.TYPE_UNTYPED {
594+ sym.BxType = varType
595+ }
596+
597+ // Resolve named struct types to their actual definitions
598+ if varType != nil && varType.Kind != ast.TYPE_STRUCT && varType.Name != "" {
599+ if typeSym := ctx.findTypeSymbol(varType.Name); typeSym != nil && typeSym.BxType.Kind == ast.TYPE_STRUCT {
600+ varType = typeSym.BxType
601+ }
602+ }
603
604 if varType != nil && varType.Kind == ast.TYPE_STRUCT {
605 rvalPtr, _ := ctx.codegenExpr(initExpr)
606@@ -1248,7 +1291,9 @@ func (ctx *Context) codegenGlobalVarDecl(d ast.VarDeclNode, sym *symbol) {
607 if structSize > 0 {
608 globalData.Items = append(globalData.Items, ir.DataItem{Typ: ir.TypeB, Count: int(structSize)})
609 }
610- if len(globalData.Items) > 0 { ctx.prog.Globals = append(ctx.prog.Globals, globalData) }
611+ if len(globalData.Items) > 0 {
612+ ctx.prog.Globals = append(ctx.prog.Globals, globalData)
613+ }
614 return
615 }
616
617@@ -1290,7 +1335,9 @@ func (ctx *Context) codegenGlobalVarDecl(d ast.VarDeclNode, sym *symbol) {
618 for _, init := range d.InitList {
619 val := ctx.codegenGlobalConst(init)
620 itemType := elemType
621- if _, ok := val.(*ir.Global); ok { itemType = ir.TypePtr }
622+ if _, ok := val.(*ir.Global); ok {
623+ itemType = ir.TypePtr
624+ }
625 globalData.Items = append(globalData.Items, ir.DataItem{Typ: itemType, Value: val})
626 }
627 initializedElements := int64(len(d.InitList))
628@@ -1301,5 +1348,7 @@ func (ctx *Context) codegenGlobalVarDecl(d ast.VarDeclNode, sym *symbol) {
629 globalData.Items = append(globalData.Items, ir.DataItem{Typ: elemType, Count: int(numElements)})
630 }
631
632- if len(globalData.Items) > 0 { ctx.prog.Globals = append(ctx.prog.Globals, globalData) }
633+ if len(globalData.Items) > 0 {
634+ ctx.prog.Globals = append(ctx.prog.Globals, globalData)
635+ }
636 }
+372,
-61
1@@ -19,11 +19,12 @@ func (ctx *Context) codegenIdent(node *ast.Node) (ir.Value, bool) {
2 }
3
4 switch sym.Type {
5- case symFunc:
6- return sym.IRVal, false
7+ case symFunc: return sym.IRVal, false
8 case symExtrn:
9 isCall := node.Parent != nil && node.Parent.Type == ast.FuncCall && node.Parent.Data.(ast.FuncCallNode).FuncExpr == node
10- if isCall { return sym.IRVal, false }
11+ if isCall {
12+ return sym.IRVal, false
13+ }
14 ctx.prog.ExtrnVars[name] = true
15 res := ctx.newTemp()
16 ctx.addInstr(&ir.Instruction{Op: ir.OpLoad, Typ: ir.TypePtr, Result: res, Args: []ir.Value{sym.IRVal}})
17@@ -45,35 +46,71 @@ func (ctx *Context) codegenIdent(node *ast.Node) (ir.Value, bool) {
18 _, isLocal := sym.IRVal.(*ir.Temporary)
19 if isLocal {
20 isDopeVector := sym.IsVector && (sym.BxType == nil || sym.BxType.Kind == ast.TYPE_UNTYPED)
21- if isParam || isDopeVector { return ctx.genLoad(sym.IRVal, sym.BxType), false }
22+ if isParam || isDopeVector {
23+ return ctx.genLoad(sym.IRVal, sym.BxType), false
24+ }
25 }
26 return sym.IRVal, false
27 }
28
29- if sym.BxType != nil && sym.BxType.Kind == ast.TYPE_STRUCT { return sym.IRVal, false }
30+ if sym.BxType != nil && sym.BxType.Kind == ast.TYPE_STRUCT {
31+ return sym.IRVal, false
32+ }
33
34 return ctx.genLoad(sym.IRVal, sym.BxType), false
35 }
36
37 func (ctx *Context) isIntegerType(t *ast.BxType) bool {
38- return t != nil && (t.Kind == ast.TYPE_PRIMITIVE || t.Kind == ast.TYPE_UNTYPED_INT)
39+ return t != nil && (t.Kind == ast.TYPE_PRIMITIVE || t.Kind == ast.TYPE_LITERAL_INT || t.Kind == ast.TYPE_ENUM)
40 }
41
42 func (ctx *Context) isFloatType(t *ast.BxType) bool {
43- return t != nil && (t.Kind == ast.TYPE_FLOAT || t.Kind == ast.TYPE_UNTYPED_FLOAT)
44+ return t != nil && (t.Kind == ast.TYPE_FLOAT || t.Kind == ast.TYPE_LITERAL_FLOAT)
45+}
46+
47+// getActualOperandType returns the IR type that will be used when loading this operand
48+// This looks at the original declaration type, not the type-checker promoted type
49+func (ctx *Context) getActualOperandType(node *ast.Node) ir.Type {
50+ switch node.Type {
51+ case ast.Ident:
52+ // For identifiers, use the symbol's original declared type
53+ name := node.Data.(ast.IdentNode).Name
54+ if sym := ctx.findSymbol(name); sym != nil && sym.BxType != nil {
55+ return ir.GetType(sym.BxType, ctx.wordSize)
56+ }
57+ case ast.FuncCall:
58+ // For function calls, use the function's return type
59+ d := node.Data.(ast.FuncCallNode)
60+ if d.FuncExpr.Type == ast.Ident {
61+ funcName := d.FuncExpr.Data.(ast.IdentNode).Name
62+ if sym := ctx.findSymbol(funcName); sym != nil && sym.BxType != nil {
63+ return ir.GetType(sym.BxType, ctx.wordSize)
64+ }
65+ }
66+ }
67+ // Fallback to the promoted type
68+ return ir.GetType(node.Typ, ctx.wordSize)
69 }
70
71 func (ctx *Context) codegenAssign(node *ast.Node) (ir.Value, bool) {
72 d := node.Data.(ast.AssignNode)
73
74- if d.Lhs.Typ != nil && d.Lhs.Typ.Kind == ast.TYPE_STRUCT {
75+ // Resolve named struct types to their actual definitions
76+ lhsType := d.Lhs.Typ
77+ if lhsType != nil && lhsType.Kind != ast.TYPE_STRUCT && lhsType.Name != "" {
78+ if typeSym := ctx.findTypeSymbol(lhsType.Name); typeSym != nil && typeSym.BxType.Kind == ast.TYPE_STRUCT {
79+ lhsType = typeSym.BxType
80+ }
81+ }
82+
83+ if lhsType != nil && lhsType.Kind == ast.TYPE_STRUCT {
84 if d.Op != token.Eq {
85 util.Error(node.Tok, "Compound assignment operators are not supported for structs")
86 return nil, false
87 }
88 lvalAddr := ctx.codegenLvalue(d.Lhs)
89 rvalPtr, _ := ctx.codegenExpr(d.Rhs)
90- size := ctx.getSizeof(d.Lhs.Typ)
91+ size := ctx.getSizeof(lhsType)
92 ctx.addInstr(&ir.Instruction{
93 Op: ir.OpBlit,
94 Args: []ir.Value{rvalPtr, lvalAddr, &ir.Const{Value: size}},
95@@ -114,6 +151,55 @@ func (ctx *Context) codegenAssign(node *ast.Node) (ir.Value, bool) {
96 return rval, false
97 }
98
99+func (ctx *Context) codegenMultiAssign(node *ast.Node) (ir.Value, bool) {
100+ d := node.Data.(ast.MultiAssignNode)
101+
102+ // Only support simple '=' assignment for multi-assignment
103+ if d.Op != token.Eq {
104+ util.Error(node.Tok, "Compound assignment operators are not supported for multi-assignment")
105+ return nil, false
106+ }
107+
108+ // Evaluate all rhs expressions first to avoid dependencies
109+ var rvals []ir.Value
110+ for _, rhs := range d.Rhs {
111+ rval, _ := ctx.codegenExpr(rhs)
112+ rvals = append(rvals, rval)
113+ }
114+
115+ // Then assign to all lhs expressions
116+ for i, lhs := range d.Lhs {
117+ lvalAddr := ctx.codegenLvalue(lhs)
118+ rval := rvals[i]
119+
120+ // Handle type conversions if needed (similar to single assignment)
121+ if lhs.Typ != nil && d.Rhs[i].Typ != nil && lhs.Typ.Kind == ast.TYPE_FLOAT && ctx.isIntegerType(d.Rhs[i].Typ) {
122+ castRval := ctx.newTemp()
123+ var convOp ir.Op
124+ if ctx.getSizeof(d.Rhs[i].Typ) == 8 {
125+ convOp = ir.OpSLToF
126+ } else {
127+ convOp = ir.OpSWToF
128+ }
129+ ctx.addInstr(&ir.Instruction{
130+ Op: convOp,
131+ Typ: ir.GetType(lhs.Typ, ctx.wordSize),
132+ Result: castRval,
133+ Args: []ir.Value{rval},
134+ })
135+ rval = castRval
136+ }
137+
138+ ctx.genStore(lvalAddr, rval, lhs.Typ)
139+ }
140+
141+ // Return the last assigned value
142+ if len(rvals) > 0 {
143+ return rvals[len(rvals)-1], false
144+ }
145+ return nil, false
146+}
147+
148 func (ctx *Context) codegenBinaryOp(node *ast.Node) (ir.Value, bool) {
149 d := node.Data.(ast.BinaryOpNode)
150 if d.Op == token.OrOr || d.Op == token.AndAnd {
151@@ -185,8 +271,51 @@ func (ctx *Context) codegenBinaryOp(node *ast.Node) (ir.Value, bool) {
152 ctx.addInstr(&ir.Instruction{Op: convOp, Typ: floatType, Result: castR, Args: []ir.Value{r}})
153 r = castR
154 }
155- if l_const, ok := l.(*ir.Const); ok { l = &ir.FloatConst{Value: float64(l_const.Value), Typ: floatType} }
156- if r_const, ok := r.(*ir.Const); ok { r = &ir.FloatConst{Value: float64(r_const.Value), Typ: floatType} }
157+ if l_const, ok := l.(*ir.Const); ok {
158+ l = &ir.FloatConst{Value: float64(l_const.Value), Typ: floatType}
159+ }
160+ if r_const, ok := r.(*ir.Const); ok {
161+ r = &ir.FloatConst{Value: float64(r_const.Value), Typ: floatType}
162+ }
163+ }
164+
165+ // Handle integer type conversions - ensure both operands have compatible types for QBE
166+ if ctx.isIntegerType(node.Typ) && !isFloatComparison {
167+ // For QBE compatibility, we need to look at the actual declaration types of the operands
168+ // rather than the promoted types from the type checker
169+ actualLeftType := ctx.getActualOperandType(d.Left)
170+ actualRightType := ctx.getActualOperandType(d.Right)
171+
172+ // Use actual types for conversion logic
173+ if actualLeftType != resultIrType {
174+ castL := ctx.newTemp()
175+ var convOp ir.Op = ir.OpCast
176+ if actualLeftType < resultIrType {
177+ // Extending to larger size
178+ switch actualLeftType {
179+ case ir.TypeB: convOp = ir.OpExtUB
180+ case ir.TypeH: convOp = ir.OpExtUH
181+ case ir.TypeW: convOp = ir.OpExtSW
182+ }
183+ }
184+ ctx.addInstr(&ir.Instruction{Op: convOp, Typ: resultIrType, OperandType: actualLeftType, Result: castL, Args: []ir.Value{l}})
185+ l = castL
186+ }
187+
188+ if actualRightType != resultIrType {
189+ castR := ctx.newTemp()
190+ var convOp ir.Op = ir.OpCast
191+ if actualRightType < resultIrType {
192+ // Extending to larger size
193+ switch actualRightType {
194+ case ir.TypeB: convOp = ir.OpExtUB
195+ case ir.TypeH: convOp = ir.OpExtUH
196+ case ir.TypeW: convOp = ir.OpExtSW
197+ }
198+ }
199+ ctx.addInstr(&ir.Instruction{Op: convOp, Typ: resultIrType, OperandType: actualRightType, Result: castR, Args: []ir.Value{r}})
200+ r = castR
201+ }
202 }
203
204 var operandIrType ir.Type
205@@ -244,7 +373,9 @@ func (ctx *Context) codegenUnaryOp(node *ast.Node) (ir.Value, bool) {
206 }
207 currentVal := ctx.genLoad(lvalAddr, d.Expr.Typ)
208 oneConst := ir.Value(&ir.Const{Value: 1})
209- if isFloat { oneConst = &ir.FloatConst{Value: 1.0, Typ: valType} }
210+ if isFloat {
211+ oneConst = &ir.FloatConst{Value: 1.0, Typ: valType}
212+ }
213 ctx.addInstr(&ir.Instruction{Op: op, Typ: valType, Result: res, Args: []ir.Value{currentVal, oneConst}})
214 ctx.genStore(lvalAddr, res, d.Expr.Typ)
215 default:
216@@ -263,10 +394,14 @@ func (ctx *Context) codegenPostfixOp(node *ast.Node) (ir.Value, bool) {
217 isFloat := ctx.isFloatType(d.Expr.Typ)
218
219 op := map[token.Type]ir.Op{token.Inc: ir.OpAdd, token.Dec: ir.OpSub}[d.Op]
220- if isFloat { op = map[token.Type]ir.Op{token.Inc: ir.OpAddF, token.Dec: ir.OpSubF}[d.Op] }
221+ if isFloat {
222+ op = map[token.Type]ir.Op{token.Inc: ir.OpAddF, token.Dec: ir.OpSubF}[d.Op]
223+ }
224
225 oneConst := ir.Value(&ir.Const{Value: 1})
226- if isFloat { oneConst = &ir.FloatConst{Value: 1.0, Typ: valType} }
227+ if isFloat {
228+ oneConst = &ir.FloatConst{Value: 1.0, Typ: valType}
229+ }
230
231 ctx.addInstr(&ir.Instruction{Op: op, Typ: valType, Result: newVal, Args: []ir.Value{res, oneConst}})
232 ctx.genStore(lvalAddr, newVal, d.Expr.Typ)
233@@ -277,7 +412,17 @@ func (ctx *Context) codegenIndirection(node *ast.Node) (ir.Value, bool) {
234 exprNode := node.Data.(ast.IndirectionNode).Expr
235 addr, _ := ctx.codegenExpr(exprNode)
236
237- if node.Typ != nil && node.Typ.Kind == ast.TYPE_STRUCT { return addr, false }
238+ // Resolve named struct types to their actual definitions
239+ nodeType := node.Typ
240+ if nodeType != nil && nodeType.Kind != ast.TYPE_STRUCT && nodeType.Name != "" {
241+ if typeSym := ctx.findTypeSymbol(nodeType.Name); typeSym != nil && typeSym.BxType.Kind == ast.TYPE_STRUCT {
242+ nodeType = typeSym.BxType
243+ }
244+ }
245+
246+ if nodeType != nil && nodeType.Kind == ast.TYPE_STRUCT {
247+ return addr, false
248+ }
249
250 loadType := node.Typ
251 if !ctx.isTypedPass && exprNode.Type == ast.Ident {
252@@ -296,7 +441,9 @@ func (ctx *Context) codegenSubscriptAddr(node *ast.Node) ir.Value {
253 var scale int64 = int64(ctx.wordSize)
254 if d.Array.Typ != nil {
255 if d.Array.Typ.Kind == ast.TYPE_POINTER || d.Array.Typ.Kind == ast.TYPE_ARRAY {
256- if d.Array.Typ.Base != nil { scale = ctx.getSizeof(d.Array.Typ.Base) }
257+ if d.Array.Typ.Base != nil {
258+ scale = ctx.getSizeof(d.Array.Typ.Base)
259+ }
260 }
261 } else if !ctx.isTypedPass && d.Array.Type == ast.Ident {
262 if sym := ctx.findSymbol(d.Array.Data.(ast.IdentNode).Name); sym != nil && sym.IsByteArray {
263@@ -331,7 +478,9 @@ func (ctx *Context) codegenAddressOf(node *ast.Node) (ir.Value, bool) {
264 name := lvalNode.Data.(ast.IdentNode).Name
265 if sym := ctx.findSymbol(name); sym != nil {
266 isTypedArray := sym.BxType != nil && sym.BxType.Kind == ast.TYPE_ARRAY
267- if sym.Type == symFunc || isTypedArray { return sym.IRVal, false }
268+ if sym.Type == symFunc || isTypedArray {
269+ return sym.IRVal, false
270+ }
271 if sym.IsVector {
272 res, _ := ctx.codegenExpr(lvalNode)
273 return res, false
274@@ -352,14 +501,29 @@ func (ctx *Context) codegenFuncCall(node *ast.Node) (ir.Value, bool) {
275
276 funcVal, _ := ctx.codegenExpr(d.FuncExpr)
277
278+ // Get function signature for type checking
279+ var expectedParamTypes []*ast.BxType
280 isVariadic := false
281+
282 if d.FuncExpr.Type == ast.Ident {
283 name := d.FuncExpr.Data.(ast.IdentNode).Name
284 if sym := ctx.findSymbol(name); sym != nil {
285 if sym.Node != nil {
286- if fd, ok := sym.Node.Data.(ast.FuncDeclNode); ok { isVariadic = fd.HasVarargs }
287+ if fd, ok := sym.Node.Data.(ast.FuncDeclNode); ok {
288+ isVariadic = fd.HasVarargs
289+ // Extract parameter types
290+ for _, param := range fd.Params {
291+ // Handle both typed parameters (VarDeclNode) and untyped parameters (IdentNode)
292+ if paramData, ok := param.Data.(ast.VarDeclNode); ok {
293+ expectedParamTypes = append(expectedParamTypes, paramData.Type)
294+ }
295+ // For IdentNode (untyped parameters), we can't extract type info, so skip
296+ }
297+ }
298+ }
299+ if !isVariadic && sym.Type == symExtrn {
300+ isVariadic = true
301 }
302- if !isVariadic && sym.Type == symExtrn { isVariadic = true }
303 }
304 }
305
306@@ -367,7 +531,46 @@ func (ctx *Context) codegenFuncCall(node *ast.Node) (ir.Value, bool) {
307 argTypes := make([]ir.Type, len(d.Args))
308 for i := len(d.Args) - 1; i >= 0; i-- {
309 argVals[i], _ = ctx.codegenExpr(d.Args[i])
310- argTypes[i] = ir.GetType(d.Args[i].Typ, ctx.wordSize)
311+
312+ // For typed functions with known parameter types, use the expected type
313+ var expectedArgType *ast.BxType
314+ if i < len(expectedParamTypes) {
315+ expectedArgType = expectedParamTypes[i]
316+ }
317+
318+ // If we have an expected type and the argument is a literal that can be coerced
319+ if expectedArgType != nil && d.Args[i].Typ != nil {
320+ argType := d.Args[i].Typ
321+
322+ // Handle float literal coercion to specific float types
323+ if argType.Kind == ast.TYPE_LITERAL_FLOAT && expectedArgType.Kind == ast.TYPE_FLOAT {
324+ // Debug warning for type coercion
325+ if ctx.cfg.IsWarningEnabled(config.WarnDebugComp) {
326+ util.Warn(ctx.cfg, config.WarnDebugComp, d.Args[i].Tok,
327+ "Coercing float literal to %s for parameter %d", expectedArgType.Name, i+1)
328+ }
329+
330+ expectedIrType := ir.GetType(expectedArgType, ctx.wordSize)
331+ currentIrType := ir.GetType(argType, ctx.wordSize)
332+
333+ // Convert if types don't match
334+ if currentIrType != expectedIrType {
335+ convertedVal := ctx.newTemp()
336+ ctx.addInstr(&ir.Instruction{
337+ Op: ir.OpFToF,
338+ Typ: expectedIrType,
339+ Result: convertedVal,
340+ Args: []ir.Value{argVals[i]},
341+ })
342+ argVals[i] = convertedVal
343+ }
344+ argTypes[i] = expectedIrType
345+ } else {
346+ argTypes[i] = ir.GetType(d.Args[i].Typ, ctx.wordSize)
347+ }
348+ } else {
349+ argTypes[i] = ir.GetType(d.Args[i].Typ, ctx.wordSize)
350+ }
351
352 if isVariadic && argTypes[i] == ir.TypeS {
353 promotedVal := ctx.newTemp()
354@@ -387,7 +590,9 @@ func (ctx *Context) codegenFuncCall(node *ast.Node) (ir.Value, bool) {
355 returnType := ir.GetType(node.Typ, ctx.wordSize)
356 callArgs := append([]ir.Value{funcVal}, argVals...)
357
358- if !isStmt && returnType != ir.TypeNone { res = ctx.newTemp() }
359+ if !isStmt && returnType != ir.TypeNone {
360+ res = ctx.newTemp()
361+ }
362
363 ctx.addInstr(&ir.Instruction{
364 Op: ir.OpCall,
365@@ -407,7 +612,9 @@ func (ctx *Context) codegenTypeCast(node *ast.Node) (ir.Value, bool) {
366 sourceType := d.Expr.Typ
367 targetType := d.TargetType
368
369- if ir.GetType(sourceType, ctx.wordSize) == ir.GetType(targetType, ctx.wordSize) { return val, false }
370+ if ir.GetType(sourceType, ctx.wordSize) == ir.GetType(targetType, ctx.wordSize) {
371+ return val, false
372+ }
373
374 res := ctx.newTemp()
375 targetIrType := ir.GetType(targetType, ctx.wordSize)
376@@ -420,7 +627,9 @@ func (ctx *Context) codegenTypeCast(node *ast.Node) (ir.Value, bool) {
377 op := ir.OpCast
378 if sourceIsInt && targetIsFloat {
379 op = ir.OpSWToF
380- if ctx.getSizeof(sourceType) == 8 { op = ir.OpSLToF }
381+ if ctx.getSizeof(sourceType) == 8 {
382+ op = ir.OpSLToF
383+ }
384 } else if sourceIsFloat && targetIsFloat {
385 op = ir.OpFToF
386 } else if sourceIsFloat && targetIsInt {
387@@ -429,9 +638,12 @@ func (ctx *Context) codegenTypeCast(node *ast.Node) (ir.Value, bool) {
388 sourceSize, targetSize := ctx.getSizeof(sourceType), ctx.getSizeof(targetType)
389 if targetSize > sourceSize {
390 switch sourceSize {
391- case 1: op = ir.OpExtSB
392- case 2: op = ir.OpExtSH
393- case 4: op = ir.OpExtSW
394+ case 1:
395+ op = ir.OpExtSB
396+ case 2:
397+ op = ir.OpExtSH
398+ case 4:
399+ op = ir.OpExtSW
400 }
401 }
402 }
403@@ -484,8 +696,12 @@ func (ctx *Context) codegenTernary(node *ast.Node) (ir.Value, bool) {
404 if !terminates {
405 ctx.startBlock(endL)
406 phiArgs := []ir.Value{}
407- if !thenTerminates { phiArgs = append(phiArgs, thenPred, thenVal) }
408- if !elseTerminates { phiArgs = append(phiArgs, elsePred, elseVal) }
409+ if !thenTerminates {
410+ phiArgs = append(phiArgs, thenPred, thenVal)
411+ }
412+ if !elseTerminates {
413+ phiArgs = append(phiArgs, elsePred, elseVal)
414+ }
415 ctx.addInstr(&ir.Instruction{Op: ir.OpPhi, Typ: resType, Result: res, Args: phiArgs})
416 }
417 return res, terminates
418@@ -587,6 +803,42 @@ func (ctx *Context) codegenStructLiteral(node *ast.Node) (ir.Value, bool) {
419 return structPtr, false
420 }
421
422+func (ctx *Context) codegenArrayLiteral(node *ast.Node) (ir.Value, bool) {
423+ d := node.Data.(ast.ArrayLiteralNode)
424+
425+ // For array literals, we need the element type and count from the literal itself
426+ elemType := d.ElementType
427+ elemSize := ctx.getSizeof(elemType)
428+ elemAlign := ctx.getAlignof(elemType)
429+ arraySize := int64(len(d.Values)) * elemSize
430+
431+ // Allocate memory for the array
432+ arrayPtr := ctx.newTemp()
433+ ctx.addInstr(&ir.Instruction{
434+ Op: ir.OpAlloc,
435+ Typ: ir.GetType(nil, ctx.wordSize),
436+ Result: arrayPtr,
437+ Args: []ir.Value{&ir.Const{Value: arraySize}},
438+ Align: int(elemAlign),
439+ })
440+
441+ // Initialize each element
442+ for i, valNode := range d.Values {
443+ elemOffset := int64(i) * elemSize
444+ elemAddr := ctx.newTemp()
445+ ctx.addInstr(&ir.Instruction{
446+ Op: ir.OpAdd,
447+ Typ: ir.GetType(nil, ctx.wordSize),
448+ Result: elemAddr,
449+ Args: []ir.Value{arrayPtr, &ir.Const{Value: elemOffset}},
450+ })
451+ val, _ := ctx.codegenExpr(valNode)
452+ ctx.genStore(elemAddr, val, elemType)
453+ }
454+
455+ return arrayPtr, false
456+}
457+
458 func (ctx *Context) codegenReturn(node *ast.Node) bool {
459 d := node.Data.(ast.ReturnNode)
460 var retVal ir.Value
461@@ -604,22 +856,30 @@ func (ctx *Context) codegenIf(node *ast.Node) bool {
462 d := node.Data.(ast.IfNode)
463 thenL, endL := ctx.newLabel(), ctx.newLabel()
464 elseL := endL
465- if d.ElseBody != nil { elseL = ctx.newLabel() }
466+ if d.ElseBody != nil {
467+ elseL = ctx.newLabel()
468+ }
469
470 ctx.codegenLogicalCond(d.Cond, thenL, elseL)
471
472 ctx.startBlock(thenL)
473 thenTerminates := ctx.codegenStmt(d.ThenBody)
474- if !thenTerminates { ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{endL}}) }
475+ if !thenTerminates {
476+ ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{endL}})
477+ }
478
479 var elseTerminates bool
480 if d.ElseBody != nil {
481 ctx.startBlock(elseL)
482 elseTerminates = ctx.codegenStmt(d.ElseBody)
483- if !elseTerminates { ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{endL}}) }
484+ if !elseTerminates {
485+ ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{endL}})
486+ }
487 }
488
489- if !thenTerminates || !elseTerminates { ctx.startBlock(endL) }
490+ if !thenTerminates || !elseTerminates {
491+ ctx.startBlock(endL)
492+ }
493 return thenTerminates && (d.ElseBody != nil && elseTerminates)
494 }
495
496@@ -637,48 +897,99 @@ func (ctx *Context) codegenWhile(node *ast.Node) bool {
497
498 ctx.startBlock(bodyL)
499 bodyTerminates := ctx.codegenStmt(d.Body)
500- if !bodyTerminates { ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{startL}}) }
501+ if !bodyTerminates {
502+ ctx.addInstr(&ir.Instruction{Op: ir.OpJmp, Args: []ir.Value{startL}})
503+ }
504
505 ctx.startBlock(endL)
506 return false
507 }
508
509 func getBinaryOpAndType(op token.Type, resultAstType *ast.BxType, wordSize int) (ir.Op, ir.Type) {
510- if resultAstType != nil && (resultAstType.Kind == ast.TYPE_FLOAT || resultAstType.Kind == ast.TYPE_UNTYPED_FLOAT) {
511+ if resultAstType != nil && (resultAstType.Kind == ast.TYPE_FLOAT || resultAstType.Kind == ast.TYPE_LITERAL_FLOAT) {
512 typ := ir.GetType(resultAstType, wordSize)
513 switch op {
514- case token.Plus, token.PlusEq, token.EqPlus: return ir.OpAddF, typ
515- case token.Minus, token.MinusEq, token.EqMinus: return ir.OpSubF, typ
516- case token.Star, token.StarEq, token.EqStar: return ir.OpMulF, typ
517- case token.Slash, token.SlashEq, token.EqSlash: return ir.OpDivF, typ
518- case token.Rem, token.RemEq, token.EqRem: return ir.OpRemF, typ
519- case token.EqEq: return ir.OpCEq, typ
520- case token.Neq: return ir.OpCNeq, typ
521- case token.Lt: return ir.OpCLt, typ
522- case token.Gt: return ir.OpCGt, typ
523- case token.Lte: return ir.OpCLe, typ
524- case token.Gte: return ir.OpCGe, typ
525+ case token.Plus, token.PlusEq, token.EqPlus:
526+ return ir.OpAddF, typ
527+ case token.Minus, token.MinusEq, token.EqMinus:
528+ return ir.OpSubF, typ
529+ case token.Star, token.StarEq, token.EqStar:
530+ return ir.OpMulF, typ
531+ case token.Slash, token.SlashEq, token.EqSlash:
532+ return ir.OpDivF, typ
533+ case token.Rem, token.RemEq, token.EqRem:
534+ return ir.OpRemF, typ
535+ case token.EqEq:
536+ return ir.OpCEq, typ
537+ case token.Neq:
538+ return ir.OpCNeq, typ
539+ case token.Lt:
540+ return ir.OpCLt, typ
541+ case token.Gt:
542+ return ir.OpCGt, typ
543+ case token.Lte:
544+ return ir.OpCLe, typ
545+ case token.Gte:
546+ return ir.OpCGe, typ
547 }
548 }
549
550 typ := ir.GetType(resultAstType, wordSize)
551 switch op {
552- case token.Plus, token.PlusEq, token.EqPlus: return ir.OpAdd, typ
553- case token.Minus, token.MinusEq, token.EqMinus: return ir.OpSub, typ
554- case token.Star, token.StarEq, token.EqStar: return ir.OpMul, typ
555- case token.Slash, token.SlashEq, token.EqSlash: return ir.OpDiv, typ
556- case token.Rem, token.RemEq, token.EqRem: return ir.OpRem, typ
557- case token.And, token.AndEq, token.EqAnd: return ir.OpAnd, typ
558- case token.Or, token.OrEq, token.EqOr: return ir.OpOr, typ
559- case token.Xor, token.XorEq, token.EqXor: return ir.OpXor, typ
560- case token.Shl, token.ShlEq, token.EqShl: return ir.OpShl, typ
561- case token.Shr, token.ShrEq, token.EqShr: return ir.OpShr, typ
562- case token.EqEq: return ir.OpCEq, typ
563- case token.Neq: return ir.OpCNeq, typ
564- case token.Lt: return ir.OpCLt, typ
565- case token.Gt: return ir.OpCGt, typ
566- case token.Lte: return ir.OpCLe, typ
567- case token.Gte: return ir.OpCGe, typ
568+ case token.Plus, token.PlusEq, token.EqPlus:
569+ return ir.OpAdd, typ
570+ case token.Minus, token.MinusEq, token.EqMinus:
571+ return ir.OpSub, typ
572+ case token.Star, token.StarEq, token.EqStar:
573+ return ir.OpMul, typ
574+ case token.Slash, token.SlashEq, token.EqSlash:
575+ return ir.OpDiv, typ
576+ case token.Rem, token.RemEq, token.EqRem:
577+ return ir.OpRem, typ
578+ case token.And, token.AndEq, token.EqAnd:
579+ return ir.OpAnd, typ
580+ case token.Or, token.OrEq, token.EqOr:
581+ return ir.OpOr, typ
582+ case token.Xor, token.XorEq, token.EqXor:
583+ return ir.OpXor, typ
584+ case token.Shl, token.ShlEq, token.EqShl:
585+ return ir.OpShl, typ
586+ case token.Shr, token.ShrEq, token.EqShr:
587+ return ir.OpShr, typ
588+ case token.EqEq:
589+ return ir.OpCEq, typ
590+ case token.Neq:
591+ return ir.OpCNeq, typ
592+ case token.Lt:
593+ return ir.OpCLt, typ
594+ case token.Gt:
595+ return ir.OpCGt, typ
596+ case token.Lte:
597+ return ir.OpCLe, typ
598+ case token.Gte:
599+ return ir.OpCGe, typ
600 }
601 return -1, -1
602 }
603+
604+// codegenTypeOf generates code for typeof(expr) which returns a string representation of the type
605+func (ctx *Context) codegenTypeOf(node *ast.Node) (ir.Value, bool) {
606+ d := node.Data.(ast.TypeOfNode)
607+
608+ // Type check the expression to determine its type
609+ _, _ = ctx.codegenExpr(d.Expr)
610+
611+ // Get the type of the expression
612+ var exprType *ast.BxType
613+ if d.Expr.Typ != nil {
614+ exprType = d.Expr.Typ
615+ } else {
616+ exprType = ast.TypeUntyped
617+ }
618+
619+ // Convert the type to its string representation
620+ typeStr := ast.TypeToString(exprType)
621+
622+ // Add the string to the string table and return a reference to it
623+ return ctx.addString(typeStr), false
624+}
+413,
-142
1@@ -15,37 +15,51 @@ import (
2 )
3
4 type llvmBackend struct {
5- out *strings.Builder
6- prog *ir.Program
7- cfg *config.Config
8- wordType string
9- tempTypes map[string]string
10- funcSigs map[string]string
11- currentFn *ir.Func
12+ out *strings.Builder
13+ prog *ir.Program
14+ cfg *config.Config
15+ wordType string
16+ tempTypes map[string]string // maps temp name to LLVM type
17+ tempIRTypes map[string]ir.Type // maps temp name to IR type
18+ funcSigs map[string]string
19+ currentFn *ir.Func
20 }
21
22 func NewLLVMBackend() Backend { return &llvmBackend{} }
23
24 func (b *llvmBackend) Generate(prog *ir.Program, cfg *config.Config) (*bytes.Buffer, error) {
25+ llvmIR, err := b.GenerateIR(prog, cfg)
26+ if err != nil {
27+ return nil, err
28+ }
29+
30+ asm, err := b.compileLLVMIR(llvmIR)
31+ if err != nil {
32+ return nil, err
33+ }
34+ return bytes.NewBufferString(asm), nil
35+}
36+
37+func (b *llvmBackend) GenerateIR(prog *ir.Program, cfg *config.Config) (string, error) {
38 var llvmIRBuilder strings.Builder
39 b.out = &llvmIRBuilder
40 b.prog = prog
41 b.cfg = cfg
42 b.wordType = fmt.Sprintf("i%d", cfg.WordSize*8)
43 b.tempTypes = make(map[string]string)
44+ b.tempIRTypes = make(map[string]ir.Type)
45 b.funcSigs = make(map[string]string)
46
47 b.gen()
48
49- llvmIR := llvmIRBuilder.String()
50- asm, err := b.compileLLVMIR(llvmIR)
51- if err != nil { return nil, err }
52- return bytes.NewBufferString(asm), nil
53+ return llvmIRBuilder.String(), nil
54 }
55
56 func (b *llvmBackend) compileLLVMIR(llvmIR string) (string, error) {
57 llFile, err := os.CreateTemp("", "gbc-main-*.ll")
58- if err != nil { return "", fmt.Errorf("failed to create temp file for LLVM IR: %w", err) }
59+ if err != nil {
60+ return "", fmt.Errorf("failed to create temp file for LLVM IR: %w", err)
61+ }
62 defer os.Remove(llFile.Name())
63 if _, err := llFile.WriteString(llvmIR); err != nil {
64 return "", fmt.Errorf("failed to write to temp file for LLVM IR: %w", err)
65@@ -53,7 +67,9 @@ func (b *llvmBackend) compileLLVMIR(llvmIR string) (string, error) {
66 llFile.Close()
67
68 asmFile, err := os.CreateTemp("", "gbc-main-*.s")
69- if err != nil { return "", fmt.Errorf("failed to create temp file for assembly: %w", err) }
70+ if err != nil {
71+ return "", fmt.Errorf("failed to create temp file for assembly: %w", err)
72+ }
73 asmFile.Close()
74 defer os.Remove(asmFile.Name())
75
76@@ -63,7 +79,9 @@ func (b *llvmBackend) compileLLVMIR(llvmIR string) (string, error) {
77 }
78
79 asmBytes, err := os.ReadFile(asmFile.Name())
80- if err != nil { return "", fmt.Errorf("failed to read temporary assembly file: %w", err) }
81+ if err != nil {
82+ return "", fmt.Errorf("failed to read temporary assembly file: %w", err)
83+ }
84 return string(asmBytes), nil
85 }
86
87@@ -90,7 +108,9 @@ func (b *llvmBackend) genDeclarations() {
88 if len(b.prog.ExtrnVars) > 0 {
89 b.out.WriteString("; --- External Variables ---\n")
90 for name := range b.prog.ExtrnVars {
91- if knownExternals[name] { continue }
92+ if knownExternals[name] {
93+ continue
94+ }
95 ptrType := "i8*"
96 fmt.Fprintf(b.out, "@%s = external global %s\n", name, ptrType)
97 b.tempTypes["@"+name] = ptrType + "*"
98@@ -100,9 +120,12 @@ func (b *llvmBackend) genDeclarations() {
99 }
100
101 potentialFuncs := make(map[string]bool)
102+
103 for _, name := range b.prog.ExtrnFuncs {
104 potentialFuncs[name] = true
105 }
106+
107+ // Find additional functions that are called but not explicitly declared
108 for _, fn := range b.prog.Funcs {
109 for _, block := range fn.Blocks {
110 for _, instr := range block.Instructions {
111@@ -115,6 +138,7 @@ func (b *llvmBackend) genDeclarations() {
112 }
113 }
114
115+ // Remove functions that are defined in this program (not external)
116 for _, fn := range b.prog.Funcs {
117 delete(potentialFuncs, fn.Name)
118 }
119@@ -131,7 +155,11 @@ func (b *llvmBackend) genDeclarations() {
120 sort.Strings(funcsToDeclare)
121 for _, name := range funcsToDeclare {
122 retType := b.getFuncSig(name)
123+
124+ // All external functions are declared as varargs
125+ // The linker will handle the correct resolution
126 sig := fmt.Sprintf("declare %s @%s(...)\n", retType, name)
127+
128 b.out.WriteString(sig)
129 b.funcSigs[name] = sig
130 }
131@@ -140,20 +168,40 @@ func (b *llvmBackend) genDeclarations() {
132 }
133
134 func (b *llvmBackend) genStrings() {
135- if len(b.prog.Strings) == 0 { return }
136+ if len(b.prog.Strings) == 0 {
137+ return
138+ }
139 b.out.WriteString("; --- String Literals ---\n")
140 for s, label := range b.prog.Strings {
141 strLen := len(s) + 1
142- escaped := b.escapeString(s)
143 typeStr := fmt.Sprintf("[%d x i8]", strLen)
144- fmt.Fprintf(b.out, "@%s = private unnamed_addr constant %s c\"%s\\00\"\n", label, typeStr, escaped)
145+
146+ // Always emit as raw byte sequence for simplicity and reliability
147+ b.out.WriteString(fmt.Sprintf("@%s = private unnamed_addr constant %s [", label, typeStr))
148+
149+ // Handle empty strings
150+ if len(s) == 0 {
151+ b.out.WriteString("i8 0")
152+ } else {
153+ for i := 0; i < len(s); i++ {
154+ if i > 0 {
155+ b.out.WriteString(", ")
156+ }
157+ b.out.WriteString(fmt.Sprintf("i8 %d", s[i]))
158+ }
159+ b.out.WriteString(", i8 0")
160+ }
161+ b.out.WriteString("]\n")
162+
163 b.tempTypes["@"+label] = typeStr + "*"
164 }
165 b.out.WriteString("\n")
166 }
167
168 func (b *llvmBackend) genGlobals() {
169- if len(b.prog.Globals) == 0 { return }
170+ if len(b.prog.Globals) == 0 {
171+ return
172+ }
173 b.out.WriteString("; --- Global Variables ---\n")
174 for _, g := range b.prog.Globals {
175 hasInitializer := false
176@@ -167,12 +215,16 @@ func (b *llvmBackend) genGlobals() {
177 totalItemCount++
178 hasInitializer = true
179 }
180- if firstItemType == -1 { firstItemType = item.Typ }
181+ if firstItemType == -1 {
182+ firstItemType = item.Typ
183+ }
184 }
185
186 var globalType string
187 elemType := b.formatType(firstItemType)
188- if firstItemType == -1 { elemType = b.wordType }
189+ if firstItemType == -1 {
190+ elemType = b.wordType
191+ }
192
193 if totalItemCount > 1 {
194 globalType = fmt.Sprintf("[%d x %s]", totalItemCount, elemType)
195@@ -214,18 +266,26 @@ func (b *llvmBackend) formatGlobalInitializerValue(v ir.Value, targetType string
196 case *ir.Const:
197 return fmt.Sprintf("%d", val.Value)
198 case *ir.FloatConst:
199- if targetType == "float" { return fmt.Sprintf("0x%X", math.Float32bits(float32(val.Value))) }
200- return fmt.Sprintf("0x%X", math.Float64bits(val.Value))
201+ if targetType == "float" {
202+ // For 32-bit floats, truncate to float32 precision then expand back to float64 for hex format
203+ float32Val := float32(val.Value)
204+ return fmt.Sprintf("0x%016X", math.Float64bits(float64(float32Val)))
205+ }
206+ return fmt.Sprintf("0x%016X", math.Float64bits(val.Value))
207 case *ir.Global:
208 strContent, isString := b.prog.IsStringLabel(val.Name)
209 if isString {
210 strType := fmt.Sprintf("[%d x i8]", len(strContent)+1)
211 gep := fmt.Sprintf("getelementptr inbounds (%s, %s* @%s, i64 0, i64 0)", strType, strType, val.Name)
212- if targetType != "i8*" { return fmt.Sprintf("ptrtoint (i8* %s to %s)", gep, targetType) }
213+ if targetType != "i8*" {
214+ return fmt.Sprintf("ptrtoint (i8* %s to %s)", gep, targetType)
215+ }
216 return gep
217 }
218 sourceType := b.getType(val)
219- if !strings.HasSuffix(sourceType, "*") { sourceType += "*" }
220+ if !strings.HasSuffix(sourceType, "*") {
221+ sourceType += "*"
222+ }
223 return fmt.Sprintf("bitcast (%s @%s to %s)", sourceType, val.Name, targetType)
224 default:
225 return "0"
226@@ -236,7 +296,9 @@ func (b *llvmBackend) genFunc(fn *ir.Func) {
227 b.currentFn = fn
228 globalTypes := make(map[string]string)
229 for k, v := range b.tempTypes {
230- if strings.HasPrefix(k, "@") { globalTypes[k] = v }
231+ if strings.HasPrefix(k, "@") {
232+ globalTypes[k] = v
233+ }
234 }
235 b.tempTypes = globalTypes
236
237@@ -245,20 +307,26 @@ func (b *llvmBackend) genFunc(fn *ir.Func) {
238 for _, p := range fn.Params {
239 pName := b.formatValue(p.Val)
240 pType := b.formatType(p.Typ)
241- if fn.Name == "main" && p.Name == "argv" { pType = "i8**" }
242+ if fn.Name == "main" && p.Name == "argv" {
243+ pType = "i8**"
244+ }
245 params = append(params, fmt.Sprintf("%s %s", pType, pName))
246 b.tempTypes[pName] = pType
247 }
248 paramStr := strings.Join(params, ", ")
249 if fn.HasVarargs {
250- if len(params) > 0 { paramStr += ", " }
251+ if len(params) > 0 {
252+ paramStr += ", "
253+ }
254 paramStr += "..."
255 }
256
257 fmt.Fprintf(b.out, "define %s @%s(%s) {\n", retTypeStr, fn.Name, paramStr)
258 for i, block := range fn.Blocks {
259 labelName := block.Label.Name
260- if i == 0 { labelName = "entry" }
261+ if i == 0 {
262+ labelName = "entry"
263+ }
264 fmt.Fprintf(b.out, "%s:\n", labelName)
265 b.genBlock(block)
266 }
267@@ -282,7 +350,9 @@ func (b *llvmBackend) genBlock(block *ir.BasicBlock) {
268 for _, instr := range block.Instructions[:phiEndIndex] {
269 if instr.Op == ir.OpPhi {
270 cast := b.genPhi(instr)
271- if cast != "" { deferredCasts = append(deferredCasts, cast) }
272+ if cast != "" {
273+ deferredCasts = append(deferredCasts, cast)
274+ }
275 }
276 }
277
278@@ -296,17 +366,23 @@ func (b *llvmBackend) genBlock(block *ir.BasicBlock) {
279 }
280
281 func (b *llvmBackend) genInstr(instr *ir.Instruction) {
282- if instr.Op == ir.OpPhi { return }
283+ if instr.Op == ir.OpPhi {
284+ return
285+ }
286
287 resultName := ""
288- if instr.Result != nil { resultName = b.formatValue(instr.Result) }
289+ if instr.Result != nil {
290+ resultName = b.formatValue(instr.Result)
291+ }
292
293 b.out.WriteString("\t")
294
295 switch instr.Op {
296 case ir.OpAlloc:
297 align := instr.Align
298- if align == 0 { align = b.cfg.StackAlignment }
299+ if align == 0 {
300+ align = b.cfg.StackAlignment
301+ }
302 sizeVal := b.prepareArg(instr.Args[0], b.wordType)
303 fmt.Fprintf(b.out, "%s = alloca i8, %s %s, align %d\n", resultName, b.wordType, sizeVal, align)
304 b.tempTypes[resultName] = "i8*"
305@@ -315,8 +391,34 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
306 valType := b.formatType(instr.Typ)
307 ptrType := valType + "*"
308 ptrVal := b.prepareArg(instr.Args[0], ptrType)
309- fmt.Fprintf(b.out, "%s = load %s, %s %s, align %d\n", resultName, valType, ptrType, ptrVal, ir.SizeOfType(instr.Typ, b.cfg.WordSize))
310- b.tempTypes[resultName] = valType
311+
312+ // Check if we need to promote smaller signed types to word size (like QBE does)
313+ needsPromotion := instr.Typ == ir.TypeSB || instr.Typ == ir.TypeSH || instr.Typ == ir.TypeUB || instr.Typ == ir.TypeUH || instr.Typ == ir.TypeB || instr.Typ == ir.TypeH || instr.Typ == ir.TypeW
314+
315+ if needsPromotion {
316+ // Load the smaller type first
317+ tempName := fmt.Sprintf("%s_small", resultName)
318+ fmt.Fprintf(b.out, "%s = load %s, %s %s, align %d\n", tempName, valType, ptrType, ptrVal, ir.SizeOfType(instr.Typ, b.cfg.WordSize))
319+
320+ // Then extend to word size
321+ wordType := b.wordType
322+ var extOp string
323+ switch instr.Typ {
324+ case ir.TypeSB: extOp = "sext" // signed byte - sign extend
325+ case ir.TypeSH: extOp = "sext" // signed half - sign extend
326+ case ir.TypeUB, ir.TypeB: extOp = "zext" // unsigned byte or ambiguous byte - zero extend
327+ case ir.TypeUH, ir.TypeH: extOp = "zext" // unsigned half or ambiguous half - zero extend
328+ case ir.TypeW: extOp = "sext" // 32-bit word - sign extend (assuming signed by default)
329+ }
330+
331+ fmt.Fprintf(b.out, "%s = %s %s %s to %s\n", resultName, extOp, valType, tempName, wordType)
332+ b.tempTypes[resultName] = wordType
333+ b.tempIRTypes[resultName] = ir.GetType(nil, b.cfg.WordSize)
334+ } else {
335+ fmt.Fprintf(b.out, "%s = load %s, %s %s, align %d\n", resultName, valType, ptrType, ptrVal, ir.SizeOfType(instr.Typ, b.cfg.WordSize))
336+ b.tempTypes[resultName] = valType
337+ b.tempIRTypes[resultName] = instr.Typ
338+ }
339
340 case ir.OpStore:
341 valType := b.formatType(instr.Typ)
342@@ -332,11 +434,25 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
343 targetType := b.formatType(instr.Typ)
344 sourceValStr := b.prepareArg(instr.Args[0], targetType)
345 if sourceValStr != resultName {
346- sourceType := b.getType(instr.Args[0])
347- if strings.HasSuffix(targetType, "*") {
348- fmt.Fprintf(b.out, "%s = bitcast %s %s to %s\n", resultName, sourceType, sourceValStr, targetType)
349+ // Check if prepareArg already did the conversion
350+ originalValStr := b.formatValue(instr.Args[0])
351+ if sourceValStr != originalValStr {
352+ // prepareArg already handled the conversion, just assign the result
353+ if strings.HasSuffix(targetType, "*") {
354+ // For pointer types, use getelementptr with 0 offset (effectively a copy)
355+ fmt.Fprintf(b.out, "%s = getelementptr i8, %s %s, i64 0\n", resultName, targetType, sourceValStr)
356+ } else {
357+ // For integer types, use add with 0
358+ fmt.Fprintf(b.out, "%s = add %s %s, 0\n", resultName, targetType, sourceValStr)
359+ }
360 } else {
361- fmt.Fprintf(b.out, "%s = add %s %s, 0\n", resultName, targetType, sourceValStr)
362+ // No conversion by prepareArg, so we need to cast
363+ sourceType := b.getType(instr.Args[0])
364+ if strings.HasSuffix(targetType, "*") {
365+ fmt.Fprintf(b.out, "%s = bitcast %s %s to %s\n", resultName, sourceType, sourceValStr, targetType)
366+ } else {
367+ fmt.Fprintf(b.out, "%s = add %s %s, 0\n", resultName, targetType, sourceValStr)
368+ }
369 }
370 }
371 b.tempTypes[resultName] = targetType
372@@ -387,22 +503,34 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
373 if isFloat {
374 opStr = "fcmp"
375 switch instr.Op {
376- case ir.OpCEq: predicate = "oeq"
377- case ir.OpCNeq: predicate = "one"
378- case ir.OpCLt: predicate = "olt"
379- case ir.OpCGt: predicate = "ogt"
380- case ir.OpCLe: predicate = "ole"
381- case ir.OpCGe: predicate = "oge"
382+ case ir.OpCEq:
383+ predicate = "oeq"
384+ case ir.OpCNeq:
385+ predicate = "one"
386+ case ir.OpCLt:
387+ predicate = "olt"
388+ case ir.OpCGt:
389+ predicate = "ogt"
390+ case ir.OpCLe:
391+ predicate = "ole"
392+ case ir.OpCGe:
393+ predicate = "oge"
394 }
395 } else {
396 opStr = "icmp"
397 switch instr.Op {
398- case ir.OpCEq: predicate = "eq"
399- case ir.OpCNeq: predicate = "ne"
400- case ir.OpCLt: predicate = "slt"
401- case ir.OpCGt: predicate = "sgt"
402- case ir.OpCLe: predicate = "sle"
403- case ir.OpCGe: predicate = "sge"
404+ case ir.OpCEq:
405+ predicate = "eq"
406+ case ir.OpCNeq:
407+ predicate = "ne"
408+ case ir.OpCLt:
409+ predicate = "slt"
410+ case ir.OpCGt:
411+ predicate = "sgt"
412+ case ir.OpCLe:
413+ predicate = "sle"
414+ case ir.OpCGe:
415+ predicate = "sge"
416 }
417 }
418
419@@ -446,7 +574,9 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
420 case ir.OpSWToF, ir.OpSLToF:
421 valType := b.formatType(instr.Typ)
422 srcType := b.wordType
423- if instr.Op == ir.OpSWToF { srcType = "i32" }
424+ if instr.Op == ir.OpSWToF {
425+ srcType = "i32"
426+ }
427 srcVal := b.prepareArg(instr.Args[0], srcType)
428 fmt.Fprintf(b.out, "%s = sitofp %s %s to %s\n", resultName, srcType, srcVal, valType)
429 b.tempTypes[resultName] = valType
430@@ -472,7 +602,9 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
431 srcType := b.getType(instr.Args[0])
432 srcVal := b.prepareArg(instr.Args[0], srcType)
433 castOp := "fptosi"
434- if instr.Op == ir.OpFToUI { castOp = "fptoui" }
435+ if instr.Op == ir.OpFToUI {
436+ castOp = "fptoui"
437+ }
438 fmt.Fprintf(b.out, "%s = %s %s %s to %s\n", resultName, castOp, srcType, srcVal, valType)
439 b.tempTypes[resultName] = valType
440
441@@ -482,13 +614,19 @@ func (b *llvmBackend) genInstr(instr *ir.Instruction) {
442 switch instr.Op {
443 case ir.OpExtSB, ir.OpExtUB:
444 srcType, castOp = "i8", "sext"
445- if instr.Op == ir.OpExtUB { castOp = "zext" }
446+ if instr.Op == ir.OpExtUB {
447+ castOp = "zext"
448+ }
449 case ir.OpExtSH, ir.OpExtUH:
450 srcType, castOp = "i16", "sext"
451- if instr.Op == ir.OpExtUH { castOp = "zext" }
452+ if instr.Op == ir.OpExtUH {
453+ castOp = "zext"
454+ }
455 case ir.OpExtSW, ir.OpExtUW:
456 srcType, castOp = "i32", "sext"
457- if instr.Op == ir.OpExtUW { castOp = "zext" }
458+ if instr.Op == ir.OpExtUW {
459+ castOp = "zext"
460+ }
461 }
462 srcVal := b.prepareArg(instr.Args[0], srcType)
463 fmt.Fprintf(b.out, "%s = %s %s %s to %s\n", resultName, castOp, srcType, srcVal, valType)
464@@ -519,18 +657,24 @@ func (b *llvmBackend) genPhi(instr *ir.Instruction) string {
465 }
466 }
467
468- if hasPtrInput && hasIntInput { phiType = "i8*" }
469+ if hasPtrInput && hasIntInput {
470+ phiType = "i8*"
471+ }
472
473 var pairs []string
474 for i := 0; i < len(instr.Args); i += 2 {
475 labelName := instr.Args[i].String()
476- if labelName == "start" { labelName = "entry" }
477+ if labelName == "start" {
478+ labelName = "entry"
479+ }
480 val := b.prepareArgForPhi(instr.Args[i+1], phiType)
481 pairs = append(pairs, fmt.Sprintf("[ %s, %%%s ]", val, labelName))
482 }
483
484 phiResultName := resultName
485- if phiType != originalResultType { phiResultName = b.newBackendTemp() }
486+ if phiType != originalResultType {
487+ phiResultName = b.newBackendTemp()
488+ }
489
490 fmt.Fprintf(b.out, "\t%s = phi %s %s\n", phiResultName, phiType, strings.Join(pairs, ", "))
491 b.tempTypes[phiResultName] = phiType
492@@ -546,14 +690,22 @@ func (b *llvmBackend) prepareArgForPhi(v ir.Value, targetType string) string {
493 valStr := b.formatValue(v)
494 currentType := b.getType(v)
495
496- if currentType == targetType || currentType == "unknown" { return valStr }
497+ if currentType == targetType || currentType == "unknown" {
498+ return valStr
499+ }
500
501 if c, isConst := v.(*ir.Const); isConst {
502- if strings.HasSuffix(targetType, "*") && c.Value == 0 { return "null" }
503- if strings.HasSuffix(targetType, "*") { return fmt.Sprintf("inttoptr (%s %s to %s)", currentType, valStr, targetType) }
504+ if strings.HasSuffix(targetType, "*") && c.Value == 0 {
505+ return "null"
506+ }
507+ if strings.HasSuffix(targetType, "*") {
508+ return fmt.Sprintf("inttoptr (%s %s to %s)", currentType, valStr, targetType)
509+ }
510 }
511
512- if _, isGlobal := v.(*ir.Global); isGlobal { return fmt.Sprintf("bitcast (%s %s to %s)", currentType, valStr, targetType) }
513+ if _, isGlobal := v.(*ir.Global); isGlobal {
514+ return fmt.Sprintf("bitcast (%s %s to %s)", currentType, valStr, targetType)
515+ }
516 return valStr
517 }
518
519@@ -580,7 +732,9 @@ func (b *llvmBackend) genAdd(instr *ir.Instruction) {
520 ptr, ptrType, offset = rhs, rhsType, lhs
521 }
522
523- if ptrType == "unknown" { ptrType = "i8*" }
524+ if ptrType == "unknown" {
525+ ptrType = "i8*"
526+ }
527
528 i8PtrVal := b.prepareArg(ptr, "i8*")
529 offsetVal := b.prepareArg(offset, b.wordType)
530@@ -627,24 +781,53 @@ func (b *llvmBackend) genAdd(instr *ir.Instruction) {
531 rhsVals := b.prepareArg(rhs, resultType)
532 fmt.Fprintf(b.out, "%s = add %s %s, %s\n", resultName, resultType, lhsVals, rhsVals)
533 b.tempTypes[resultName] = resultType
534+ b.tempIRTypes[resultName] = instr.Typ
535 }
536 }
537
538 func (b *llvmBackend) genCall(instr *ir.Instruction) {
539 resultName := ""
540- if instr.Result != nil { resultName = b.formatValue(instr.Result) }
541+ if instr.Result != nil {
542+ resultName = b.formatValue(instr.Result)
543+ }
544
545 callee := instr.Args[0]
546 calleeStr := b.formatValue(callee)
547 retType := b.getFuncSig(callee.String())
548
549+ // Check if this is an external function call (declared but not defined)
550+ isExternalFunc := false
551+ if g, ok := callee.(*ir.Global); ok {
552+ // Check if the function is not defined in this program (making it external)
553+ funcIsDefined := false
554+ for _, fn := range b.prog.Funcs {
555+ if fn.Name == g.Name {
556+ funcIsDefined = true
557+ break
558+ }
559+ }
560+ if !funcIsDefined {
561+ isExternalFunc = true
562+ }
563+ }
564+
565 var argParts []string
566 for i, arg := range instr.Args[1:] {
567 targetType := b.wordType
568 if instr.ArgTypes != nil && i < len(instr.ArgTypes) {
569- targetType = b.formatType(instr.ArgTypes[i])
570+ requestedType := b.formatType(instr.ArgTypes[i])
571+
572+ // For external functions, let the linker handle type conversions
573+ // Only do integer promotions for small integer types, preserve pointers and floats
574+ if isExternalFunc && (requestedType == "i8" || requestedType == "i16") {
575+ targetType = b.wordType
576+ } else {
577+ targetType = requestedType
578+ }
579 } else if g, ok := arg.(*ir.Global); ok {
580- if _, isString := b.prog.IsStringLabel(g.Name); isString { targetType = "i8*" }
581+ if _, isString := b.prog.IsStringLabel(g.Name); isString {
582+ targetType = "i8*"
583+ }
584 }
585 valStr := b.prepareArg(arg, targetType)
586 argParts = append(argParts, fmt.Sprintf("%s %s", targetType, valStr))
587@@ -681,58 +864,111 @@ func (b *llvmBackend) prepareArg(v ir.Value, targetType string) string {
588 }
589 }
590
591- if _, ok := v.(*ir.Const); ok { return valStr }
592- if _, ok := v.(*ir.FloatConst); ok { return valStr }
593+ if _, ok := v.(*ir.Const); ok {
594+ return valStr
595+ }
596+ if _, ok := v.(*ir.FloatConst); ok {
597+ return valStr
598+ }
599
600 currentType := b.getType(v)
601- if currentType == targetType || currentType == "unknown" { return valStr }
602+ if currentType == targetType || currentType == "unknown" {
603+ return valStr
604+ }
605
606+ // Get the IR type to determine signedness for casting
607+ sourceIRType := b.getIRType(v)
608 castTemp := b.newBackendTemp()
609 b.out.WriteString("\t")
610- b.out.WriteString(b.formatCast(valStr, castTemp, currentType, targetType))
611+ b.out.WriteString(b.formatCastWithSignedness(valStr, castTemp, currentType, targetType, sourceIRType))
612 b.out.WriteString("\n")
613 b.tempTypes[castTemp] = targetType
614 return castTemp
615 }
616
617 func (b *llvmBackend) formatCast(sourceName, targetName, sourceType, targetType string) string {
618+ return b.formatCastWithSignedness(sourceName, targetName, sourceType, targetType, ir.TypeNone)
619+}
620+
621+func (b *llvmBackend) formatCastWithSignedness(sourceName, targetName, sourceType, targetType string, sourceIRType ir.Type) string {
622 isSourcePtr, isTargetPtr := strings.HasSuffix(sourceType, "*"), strings.HasSuffix(targetType, "*")
623 isSourceInt, isTargetInt := strings.HasPrefix(sourceType, "i") && !isSourcePtr, strings.HasPrefix(targetType, "i") && !isTargetPtr
624 isSourceFloat, isTargetFloat := sourceType == "float" || sourceType == "double", targetType == "float" || targetType == "double"
625
626 var castOp string
627 switch {
628- case sourceType == "i1" && isTargetInt: castOp = "zext"
629- case isSourceInt && targetType == "i1": return fmt.Sprintf("%s = icmp ne %s %s, 0", targetName, sourceType, sourceName)
630- case isSourceInt && isTargetPtr: castOp = "inttoptr"
631- case isSourcePtr && isTargetInt: castOp = "ptrtoint"
632- case isSourcePtr && isTargetPtr: castOp = "bitcast"
633+ case sourceType == "i1" && isTargetInt:
634+ castOp = "zext"
635+ case isSourceInt && targetType == "i1":
636+ return fmt.Sprintf("%s = icmp ne %s %s, 0", targetName, sourceType, sourceName)
637+ case isSourceInt && isTargetPtr:
638+ castOp = "inttoptr"
639+ case isSourcePtr && isTargetInt:
640+ castOp = "ptrtoint"
641+ case isSourcePtr && isTargetPtr:
642+ castOp = "bitcast"
643 case isSourceInt && isTargetInt:
644 sourceBits, _ := strconv.Atoi(strings.TrimPrefix(sourceType, "i"))
645 targetBits, _ := strconv.Atoi(strings.TrimPrefix(targetType, "i"))
646- castOp = "sext"
647- if sourceBits > targetBits { castOp = "trunc" }
648- case isSourceInt && isTargetFloat: castOp = "sitofp"
649- case isSourceFloat && isTargetInt: castOp = "fptosi"
650+ if sourceBits > targetBits {
651+ castOp = "trunc"
652+ } else {
653+ // Choose sext vs zext based on source IR type signedness
654+ switch sourceIRType {
655+ case ir.TypeUB, ir.TypeUH:
656+ castOp = "zext" // unsigned types get zero extension
657+ default:
658+ castOp = "sext" // signed types (and ambiguous ones) get sign extension
659+ }
660+ }
661+ case isSourceInt && isTargetFloat:
662+ castOp = "sitofp"
663+ case isSourceFloat && isTargetInt:
664+ castOp = "fptosi"
665 case isSourceFloat && isTargetFloat:
666 castOp = "fpext"
667- if sourceType == "double" { castOp = "fptrunc" }
668- default: castOp = "bitcast"
669+ if sourceType == "double" {
670+ castOp = "fptrunc"
671+ }
672+ default:
673+ castOp = "bitcast"
674 }
675 return fmt.Sprintf("%s = %s %s %s to %s", targetName, castOp, sourceType, sourceName, targetType)
676 }
677
678 func (b *llvmBackend) getType(v ir.Value) string {
679 valStr := b.formatValue(v)
680- if t, ok := b.tempTypes[valStr]; ok { return t }
681- if _, ok := v.(*ir.Const); ok { return b.wordType }
682- if fc, ok := v.(*ir.FloatConst); ok { return b.formatType(fc.Typ) }
683+ if t, ok := b.tempTypes[valStr]; ok {
684+ return t
685+ }
686+ if _, ok := v.(*ir.Const); ok {
687+ return b.wordType
688+ }
689+ if fc, ok := v.(*ir.FloatConst); ok {
690+ return b.formatType(fc.Typ)
691+ }
692 if g, ok := v.(*ir.Global); ok {
693- if _, isString := b.prog.IsStringLabel(g.Name); isString { return "i8*" }
694+ if _, isString := b.prog.IsStringLabel(g.Name); isString {
695+ return "i8*"
696+ }
697 }
698 return "unknown"
699 }
700
701+func (b *llvmBackend) getIRType(v ir.Value) ir.Type {
702+ valStr := b.formatValue(v)
703+ if t, ok := b.tempIRTypes[valStr]; ok {
704+ return t
705+ }
706+ if _, ok := v.(*ir.Const); ok {
707+ return ir.GetType(nil, b.prog.WordSize)
708+ }
709+ if fc, ok := v.(*ir.FloatConst); ok {
710+ return fc.Typ
711+ }
712+ return ir.TypeNone
713+}
714+
715 func (b *llvmBackend) newBackendTemp() string {
716 name := fmt.Sprintf("%%.b%d", b.prog.GetBackendTempCount())
717 b.prog.IncBackendTempCount()
718@@ -740,92 +976,127 @@ func (b *llvmBackend) newBackendTemp() string {
719 }
720
721 func (b *llvmBackend) formatValue(v ir.Value) string {
722- if v == nil { return "void" }
723+ if v == nil {
724+ return "void"
725+ }
726 switch val := v.(type) {
727- case *ir.Const: return fmt.Sprintf("%d", val.Value)
728+ case *ir.Const:
729+ return fmt.Sprintf("%d", val.Value)
730 case *ir.FloatConst:
731 if val.Typ == ir.TypeS {
732+ // For 32-bit floats, truncate to float32 precision then expand back to float64 for hex format
733 float32Val := float32(val.Value)
734- float64Val := float64(float32Val)
735- return fmt.Sprintf("0x%016X", math.Float64bits(float64Val))
736+ return fmt.Sprintf("0x%016X", math.Float64bits(float64(float32Val)))
737 } else {
738 return fmt.Sprintf("0x%016X", math.Float64bits(val.Value))
739 }
740- case *ir.Global: return "@" + val.Name
741+ case *ir.Global:
742+ return "@" + val.Name
743 case *ir.Temporary:
744 safeName := strings.NewReplacer(".", "_", "[", "_", "]", "_").Replace(val.Name)
745- if val.ID == -1 { return "%" + safeName }
746- if safeName != "" { return fmt.Sprintf("%%.%s_%d", safeName, val.ID) }
747+ if val.ID == -1 {
748+ return "%" + safeName
749+ }
750+ if safeName != "" {
751+ return fmt.Sprintf("%%.%s_%d", safeName, val.ID)
752+ }
753 return fmt.Sprintf("%%t%d", val.ID)
754- case *ir.Label: return "%" + val.Name
755- case *ir.CastValue: return b.formatValue(val.Value)
756- default: return ""
757+ case *ir.Label:
758+ return "%" + val.Name
759+ case *ir.CastValue:
760+ return b.formatValue(val.Value)
761+ default:
762+ return ""
763 }
764 }
765
766 func (b *llvmBackend) formatType(t ir.Type) string {
767 switch t {
768- case ir.TypeB: return "i8"
769- case ir.TypeH: return "i16"
770- case ir.TypeW: return "i32"
771- case ir.TypeL: return "i64"
772- case ir.TypeS: return "float"
773- case ir.TypeD: return "double"
774- case ir.TypeNone: return "void"
775- case ir.TypePtr: return "i8*"
776- default: return b.wordType
777+ case ir.TypeB, ir.TypeSB, ir.TypeUB:
778+ return "i8"
779+ case ir.TypeH, ir.TypeSH, ir.TypeUH:
780+ return "i16"
781+ case ir.TypeW:
782+ return "i32"
783+ case ir.TypeL:
784+ return "i64"
785+ case ir.TypeS:
786+ return "float"
787+ case ir.TypeD:
788+ return "double"
789+ case ir.TypeNone:
790+ return "void"
791+ case ir.TypePtr:
792+ return "i8*"
793+ default:
794+ return b.wordType
795 }
796 }
797
798 func (b *llvmBackend) formatOp(op ir.Op) (string, string) {
799 switch op {
800- case ir.OpAdd: return "add", ""
801- case ir.OpSub: return "sub", ""
802- case ir.OpMul: return "mul", ""
803- case ir.OpDiv: return "sdiv", ""
804- case ir.OpRem: return "srem", ""
805- case ir.OpAddF: return "fadd", ""
806- case ir.OpSubF: return "fsub", ""
807- case ir.OpMulF: return "fmul", ""
808- case ir.OpDivF: return "fdiv", ""
809- case ir.OpRemF: return "frem", ""
810- case ir.OpNegF: return "fneg", ""
811- case ir.OpAnd: return "and", ""
812- case ir.OpOr: return "or", ""
813- case ir.OpXor: return "xor", ""
814- case ir.OpShl: return "shl", ""
815- case ir.OpShr: return "ashr", ""
816- case ir.OpCEq: return "icmp", "eq"
817- case ir.OpCNeq: return "icmp", "ne"
818- case ir.OpCLt: return "icmp", "slt"
819- case ir.OpCGt: return "icmp", "sgt"
820- case ir.OpCLe: return "icmp", "sle"
821- case ir.OpCGe: return "icmp", "sge"
822- default: return "unknown_op", ""
823- }
824-}
825-
826-func (b *llvmBackend) escapeString(s string) string {
827- var sb strings.Builder
828- for _, byteVal := range []byte(s) {
829- if byteVal < 32 || byteVal > 126 || byteVal == '"' || byteVal == '\\' {
830- sb.WriteString(fmt.Sprintf("\\%02X", byteVal))
831- } else {
832- sb.WriteByte(byteVal)
833- }
834+ case ir.OpAdd:
835+ return "add", ""
836+ case ir.OpSub:
837+ return "sub", ""
838+ case ir.OpMul:
839+ return "mul", ""
840+ case ir.OpDiv:
841+ return "sdiv", ""
842+ case ir.OpRem:
843+ return "srem", ""
844+ case ir.OpAddF:
845+ return "fadd", ""
846+ case ir.OpSubF:
847+ return "fsub", ""
848+ case ir.OpMulF:
849+ return "fmul", ""
850+ case ir.OpDivF:
851+ return "fdiv", ""
852+ case ir.OpRemF:
853+ return "frem", ""
854+ case ir.OpNegF:
855+ return "fneg", ""
856+ case ir.OpAnd:
857+ return "and", ""
858+ case ir.OpOr:
859+ return "or", ""
860+ case ir.OpXor:
861+ return "xor", ""
862+ case ir.OpShl:
863+ return "shl", ""
864+ case ir.OpShr:
865+ return "ashr", ""
866+ case ir.OpCEq:
867+ return "icmp", "eq"
868+ case ir.OpCNeq:
869+ return "icmp", "ne"
870+ case ir.OpCLt:
871+ return "icmp", "slt"
872+ case ir.OpCGt:
873+ return "icmp", "sgt"
874+ case ir.OpCLe:
875+ return "icmp", "sle"
876+ case ir.OpCGe:
877+ return "icmp", "sge"
878+ default:
879+ return "unknown_op", ""
880 }
881- return sb.String()
882 }
883
884 func (b *llvmBackend) isPointerValue(v ir.Value) bool {
885 if g, ok := v.(*ir.Global); ok {
886- if _, isString := b.prog.IsStringLabel(g.Name); isString { return true }
887+ if _, isString := b.prog.IsStringLabel(g.Name); isString {
888+ return true
889+ }
890 return b.prog.FindFunc(g.Name) == nil && b.funcSigs[g.Name] == ""
891 }
892 return false
893 }
894
895 func (b *llvmBackend) prepareArgForComparison(v ir.Value, targetType string) string {
896- if c, isConst := v.(*ir.Const); isConst && c.Value == 0 && strings.HasSuffix(targetType, "*") { return "null" }
897+ if c, isConst := v.(*ir.Const); isConst && c.Value == 0 && strings.HasSuffix(targetType, "*") {
898+ return "null"
899+ }
900 return b.prepareArg(v, targetType)
901 }
+371,
-109
1@@ -3,7 +3,6 @@ package codegen
2 import (
3 "bytes"
4 "fmt"
5- "strconv"
6 "strings"
7
8 "github.com/xplshn/gbc/pkg/ast"
9@@ -12,29 +11,34 @@ import (
10 "modernc.org/libqbe"
11 )
12
13-type qbeBackend struct {
14+type qbeBackend struct{
15 out *strings.Builder
16 prog *ir.Program
17 currentFn *ir.Func
18 structTypes map[string]bool
19+ extCounter int
20 }
21
22 func NewQBEBackend() Backend { return &qbeBackend{structTypes: make(map[string]bool)} }
23
24 func (b *qbeBackend) Generate(prog *ir.Program, cfg *config.Config) (*bytes.Buffer, error) {
25+ qbeIR, err := b.GenerateIR(prog, cfg)
26+ if err != nil { return nil, err }
27+
28+ var asmBuf bytes.Buffer
29+ err = libqbe.Main(cfg.BackendTarget, "input.ssa", strings.NewReader(qbeIR), &asmBuf, nil)
30+ if err != nil { return nil, fmt.Errorf("\n--- QBE Compilation Failed ---\nGenerated IR:\n%s\n\nlibqbe error: %w", qbeIR, err) }
31+ return &asmBuf, nil
32+}
33+
34+func (b *qbeBackend) GenerateIR(prog *ir.Program, cfg *config.Config) (string, error) {
35 var qbeIRBuilder strings.Builder
36 b.out = &qbeIRBuilder
37 b.prog = prog
38
39 b.gen()
40
41- qbeIR := qbeIRBuilder.String()
42- var asmBuf bytes.Buffer
43- err := libqbe.Main(cfg.BackendTarget, "input.ssa", strings.NewReader(qbeIR), &asmBuf, nil)
44- if err != nil {
45- return nil, fmt.Errorf("\n--- QBE Compilation Failed ---\nGenerated IR:\n%s\n\nlibqbe error: %w", qbeIR, err)
46- }
47- return &asmBuf, nil
48+ return qbeIRBuilder.String(), nil
49 }
50
51 func (b *qbeBackend) gen() {
52@@ -47,8 +51,18 @@ func (b *qbeBackend) gen() {
53 if len(b.prog.Strings) > 0 {
54 b.out.WriteString("\n")
55 for s, label := range b.prog.Strings {
56- escaped := strconv.Quote(s)
57- fmt.Fprintf(b.out, "data $%s = { b %s, b 0 }\n", label, escaped)
58+ b.out.WriteString(fmt.Sprintf("data $%s = { ", label))
59+
60+ if len(s) == 0 {
61+ b.out.WriteString("b 0 }\n")
62+ continue
63+ }
64+
65+ for i := 0; i < len(s); i++ {
66+ if i > 0 { b.out.WriteString(", ") }
67+ b.out.WriteString(fmt.Sprintf("b %d", s[i]))
68+ }
69+ b.out.WriteString(", b 0 }\n")
70 }
71 }
72
73@@ -65,10 +79,8 @@ func (b *qbeBackend) formatFieldType(t *ast.BxType) (string, bool) {
74 if _, defined := b.structTypes[t.Name]; defined { return ":" + t.Name, true }
75 }
76 return "", false
77- case ast.TYPE_POINTER, ast.TYPE_ARRAY:
78- return b.formatType(ir.GetType(nil, b.prog.WordSize)), true
79- default:
80- return b.formatType(ir.GetType(t, b.prog.WordSize)), true
81+ case ast.TYPE_POINTER, ast.TYPE_ARRAY: return b.formatType(ir.GetType(nil, b.prog.WordSize)), true
82+ default: return b.formatType(ir.GetType(t, b.prog.WordSize)), true
83 }
84 }
85
86@@ -77,7 +89,9 @@ func (b *qbeBackend) genStructTypes() {
87
88 var collect func(t *ast.BxType)
89 collect = func(t *ast.BxType) {
90- if t == nil { return }
91+ if t == nil {
92+ return
93+ }
94 if t.Kind == ast.TYPE_STRUCT {
95 if _, exists := allStructs[t.Name]; !exists && t.Name != "" {
96 allStructs[t.Name] = t
97@@ -114,14 +128,18 @@ func (b *qbeBackend) genStructTypes() {
98 }
99 }
100
101- if len(allStructs) == 0 { return }
102+ if len(allStructs) == 0 {
103+ return
104+ }
105
106 b.out.WriteString("\n")
107 definedCount := -1
108 for len(b.structTypes) < len(allStructs) && len(b.structTypes) != definedCount {
109 definedCount = len(b.structTypes)
110 for name, typ := range allStructs {
111- if b.structTypes[name] { continue }
112+ if b.structTypes[name] {
113+ continue
114+ }
115
116 var fieldTypes []string
117 canDefine := true
118@@ -145,7 +163,9 @@ func (b *qbeBackend) genStructTypes() {
119
120 func (b *qbeBackend) genGlobal(g *ir.Data) {
121 alignStr := ""
122- if g.Align > 0 { alignStr = fmt.Sprintf("align %d ", g.Align) }
123+ if g.Align > 0 {
124+ alignStr = fmt.Sprintf("align %d ", g.Align)
125+ }
126
127 fmt.Fprintf(b.out, "data $%s = %s{ ", g.Name, alignStr)
128 for i, item := range g.Items {
129@@ -158,7 +178,9 @@ func (b *qbeBackend) genGlobal(g *ir.Data) {
130 } else {
131 fmt.Fprintf(b.out, "%s %s", b.formatType(item.Typ), b.formatValue(item.Value))
132 }
133- if i < len(g.Items)-1 { b.out.WriteString(", ") }
134+ if i < len(g.Items)-1 {
135+ b.out.WriteString(", ")
136+ }
137 }
138 b.out.WriteString(" }\n")
139 }
140@@ -170,7 +192,9 @@ func (b *qbeBackend) genFunc(fn *ir.Func) {
141 retTypeStr = " :" + fn.AstReturnType.Name
142 } else {
143 retTypeStr = b.formatType(fn.ReturnType)
144- if retTypeStr != "" { retTypeStr = " " + retTypeStr }
145+ if retTypeStr != "" {
146+ retTypeStr = " " + retTypeStr
147+ }
148 }
149
150 fmt.Fprintf(b.out, "\nexport function%s $%s(", retTypeStr, fn.Name)
151@@ -181,11 +205,15 @@ func (b *qbeBackend) genFunc(fn *ir.Func) {
152 paramType = ir.GetType(nil, b.prog.WordSize)
153 }
154 fmt.Fprintf(b.out, "%s %s", b.formatType(paramType), b.formatValue(p.Val))
155- if i < len(fn.Params)-1 { b.out.WriteString(", ") }
156+ if i < len(fn.Params)-1 {
157+ b.out.WriteString(", ")
158+ }
159 }
160
161 if fn.HasVarargs {
162- if len(fn.Params) > 0 { b.out.WriteString(", ") }
163+ if len(fn.Params) > 0 {
164+ b.out.WriteString(", ")
165+ }
166 b.out.WriteString("...")
167 }
168 b.out.WriteString(") {\n")
169@@ -211,19 +239,39 @@ func (b *qbeBackend) genInstr(instr *ir.Instruction) {
170 return
171 }
172
173+ // Handle special case for byte arithmetic operations
174+ isArithmetic := (instr.Op >= ir.OpAdd && instr.Op <= ir.OpShr) || (instr.Op >= ir.OpAddF && instr.Op <= ir.OpNegF)
175+ if isArithmetic && (instr.Typ == ir.TypeB || instr.Typ == ir.TypeSB || instr.Typ == ir.TypeUB) {
176+ // For byte arithmetic, generate as word arithmetic with appropriate conversions
177+ b.genByteArithmetic(instr)
178+ return
179+ }
180+
181+ // Handle special case for float arithmetic operations requiring type conversion
182+ isFloatArithmetic := instr.Op >= ir.OpAddF && instr.Op <= ir.OpRemF
183+ if isFloatArithmetic && instr.Typ == ir.TypeD {
184+ // For double precision float arithmetic, generate with appropriate conversions
185+ b.genFloatArithmetic(instr)
186+ return
187+ }
188+
189 b.out.WriteString("\t")
190 if instr.Result != nil {
191 resultType := instr.Typ
192 isComparison := instr.Op >= ir.OpCEq && instr.Op <= ir.OpCGe
193
194- if isComparison { resultType = ir.GetType(nil, b.prog.WordSize) }
195+ if isComparison {
196+ resultType = ir.GetType(nil, b.prog.WordSize)
197+ }
198
199- if instr.Op == ir.OpLoad && (instr.Typ == ir.TypeB || instr.Typ == ir.TypeH) {
200+ // In QBE, temporaries can only have base types. On 64-bit systems, promote sub-word types to long (l)
201+ if instr.Op == ir.OpLoad && b.isSubWordType(instr.Typ) {
202 resultType = ir.GetType(nil, b.prog.WordSize)
203 }
204
205- if instr.Op == ir.OpCast && (resultType == ir.TypeB || resultType == ir.TypeH) {
206- resultType = ir.TypeW
207+ // For cast operations, ensure result types are base types
208+ if instr.Op == ir.OpCast && b.isSubWordType(resultType) {
209+ resultType = ir.GetType(nil, b.prog.WordSize)
210 }
211
212 fmt.Fprintf(b.out, "%s =%s ", b.formatValue(instr.Result), b.formatType(resultType))
213@@ -235,157 +283,371 @@ func (b *qbeBackend) genInstr(instr *ir.Instruction) {
214 if instr.Op == ir.OpPhi {
215 for i := 0; i < len(instr.Args); i += 2 {
216 fmt.Fprintf(b.out, " @%s %s", instr.Args[i].String(), b.formatValue(instr.Args[i+1]))
217- if i+2 < len(instr.Args) { b.out.WriteString(",") }
218+ if i+2 < len(instr.Args) {
219+ b.out.WriteString(",")
220+ }
221 }
222 } else {
223 for i, arg := range instr.Args {
224 b.out.WriteString(" ")
225- if arg != nil { b.out.WriteString(b.formatValue(arg)) }
226- if i < len(instr.Args)-1 { b.out.WriteString(",") }
227+ if arg != nil {
228+ b.out.WriteString(b.formatValue(arg))
229+ }
230+ if i < len(instr.Args)-1 {
231+ b.out.WriteString(",")
232+ }
233+ }
234+ }
235+ b.out.WriteString("\n")
236+}
237+
238+func (b *qbeBackend) genFloatArithmetic(instr *ir.Instruction) {
239+ // For float arithmetic operations, handle operands appropriately
240+ resultType := instr.Typ
241+
242+ // Build arguments - no extension needed if operands match result type
243+ var args []string
244+ for _, arg := range instr.Args {
245+ if arg == nil {
246+ args = append(args, "")
247+ continue
248+ }
249+
250+ if floatConst, ok := arg.(*ir.FloatConst); ok {
251+ // Float constants can be directly used with the proper format
252+ if resultType == ir.TypeD {
253+ args = append(args, fmt.Sprintf("d_%f", floatConst.Value))
254+ } else {
255+ args = append(args, b.formatValue(arg))
256+ }
257+ } else {
258+ // For temporaries, use directly - the type system ensures consistency
259+ args = append(args, b.formatValue(arg))
260+ }
261+ }
262+
263+ // Generate the arithmetic instruction
264+ b.out.WriteString("\t")
265+ if instr.Result != nil {
266+ fmt.Fprintf(b.out, "%s =%s ", b.formatValue(instr.Result), b.formatType(resultType))
267+ }
268+
269+ opStr, _ := b.formatOp(instr)
270+ b.out.WriteString(opStr)
271+
272+ for i, argStr := range args {
273+ b.out.WriteString(" " + argStr)
274+ if i < len(args)-1 {
275+ b.out.WriteString(",")
276 }
277 }
278 b.out.WriteString("\n")
279 }
280
281+func (b *qbeBackend) genByteArithmetic(instr *ir.Instruction) {
282+ // For byte arithmetic operations, we need to handle operand type conversion
283+ // Generate intermediate temporaries with proper extensions when needed
284+ resultType := ir.GetType(nil, b.prog.WordSize) // long on 64-bit
285+
286+ // Convert operands to the target type by generating extension instructions
287+ var convertedArgs []string
288+ for _, arg := range instr.Args {
289+ if arg == nil {
290+ convertedArgs = append(convertedArgs, "")
291+ continue
292+ }
293+
294+ if const_arg, ok := arg.(*ir.Const); ok {
295+ // Constants can be directly used with the right value
296+ convertedArgs = append(convertedArgs, fmt.Sprintf("%d", const_arg.Value))
297+ } else {
298+ // For temporaries, we need to generate an extension instruction
299+ extTemp := fmt.Sprintf("%%ext_%d", b.extCounter)
300+ b.extCounter++
301+ // Generate extension instruction: extTemp =l extsw arg (word to long)
302+ b.out.WriteString(fmt.Sprintf("\t%s =%s extsw %s\n",
303+ extTemp, b.formatType(resultType), b.formatValue(arg)))
304+ convertedArgs = append(convertedArgs, extTemp)
305+ }
306+ }
307+
308+ // Now generate the actual arithmetic instruction with converted operands
309+ b.out.WriteString("\t")
310+ if instr.Result != nil {
311+ fmt.Fprintf(b.out, "%s =%s ", b.formatValue(instr.Result), b.formatType(resultType))
312+ }
313+
314+ opStr, _ := b.formatOp(instr)
315+ b.out.WriteString(opStr)
316+
317+ for i, argStr := range convertedArgs {
318+ b.out.WriteString(" " + argStr)
319+ if i < len(convertedArgs)-1 {
320+ b.out.WriteString(",")
321+ }
322+ }
323+ b.out.WriteString("\n")
324+}
325+
326+// isSubWordType checks if a type needs promotion for QBE function calls
327+func (b *qbeBackend) isSubWordType(t ir.Type) bool {
328+ return t == ir.TypeB || t == ir.TypeSB || t == ir.TypeUB ||
329+ t == ir.TypeH || t == ir.TypeSH || t == ir.TypeUH || t == ir.TypeW
330+}
331+
332 func (b *qbeBackend) genCall(instr *ir.Instruction) {
333 callee := instr.Args[0]
334 calleeName := ""
335- if g, ok := callee.(*ir.Global); ok { calleeName = g.Name }
336+ if g, ok := callee.(*ir.Global); ok {
337+ calleeName = g.Name
338+ }
339+
340+ // Pre-generate all needed extension instructions
341+ var processedArgs []struct {
342+ value string
343+ targetType ir.Type
344+ }
345+
346+ for i, arg := range instr.Args[1:] {
347+ argType := ir.GetType(nil, b.prog.WordSize)
348+ if instr.ArgTypes != nil && i < len(instr.ArgTypes) {
349+ argType = instr.ArgTypes[i]
350+ }
351+
352+ argValue := b.formatValue(arg)
353+ targetType := argType
354+
355+ // Promote sub-word types to target word size and generate extension if needed
356+ if b.isSubWordType(argType) {
357+ targetType = ir.GetType(nil, b.prog.WordSize)
358+ if argType != targetType {
359+ extTemp := fmt.Sprintf("%%ext_%d", b.extCounter)
360+ b.extCounter++
361+
362+ // Select extension operation based on source type
363+ var extOp string
364+ switch argType {
365+ case ir.TypeW: extOp = "extsw"
366+ case ir.TypeUB: extOp = "extub"
367+ case ir.TypeSB: extOp = "extsb"
368+ case ir.TypeUH: extOp = "extuh"
369+ case ir.TypeSH: extOp = "extsh"
370+ default:
371+ extOp = "extub" // Default for ambiguous b/h types
372+ }
373+
374+ // Generate extension instruction before the call
375+ fmt.Fprintf(b.out, "\t%s =%s %s %s\n", extTemp, b.formatType(targetType), extOp, argValue)
376+ argValue = extTemp
377+ }
378+ }
379+
380+ processedArgs = append(processedArgs, struct {
381+ value string
382+ targetType ir.Type
383+ }{argValue, targetType})
384+ }
385
386+ // Generate result assignment if needed
387 if instr.Result != nil {
388 var retTypeStr string
389 calledFunc := b.prog.FindFunc(calleeName)
390 if calledFunc != nil && calledFunc.AstReturnType != nil && calledFunc.AstReturnType.Kind == ast.TYPE_STRUCT {
391 retTypeStr = " :" + calledFunc.AstReturnType.Name
392 } else {
393- actualReturnType := instr.Typ
394- if len(instr.ArgTypes) > 0 {
395- argType := instr.ArgTypes[0]
396- if argType == ir.TypeS || argType == ir.TypeD {
397- switch calleeName {
398- case "sqrt", "sin", "cos", "fabs":
399- actualReturnType = argType
400- }
401- }
402- }
403- retTypeStr = b.formatType(actualReturnType)
404+ retTypeStr = b.formatType(instr.Typ)
405 }
406- fmt.Fprintf(b.out, "%s =%s ", b.formatValue(instr.Result), retTypeStr)
407+ fmt.Fprintf(b.out, "\t%s =%s ", b.formatValue(instr.Result), retTypeStr)
408+ } else {
409+ b.out.WriteString("\t")
410 }
411
412+ // Generate call with processed arguments
413 fmt.Fprintf(b.out, "call %s(", b.formatValue(callee))
414-
415- for i, arg := range instr.Args[1:] {
416- argType := ir.GetType(nil, b.prog.WordSize)
417- if instr.ArgTypes != nil && i < len(instr.ArgTypes) {
418- argType = instr.ArgTypes[i]
419- }
420- if argType == ir.TypeB || argType == ir.TypeH {
421- argType = ir.GetType(nil, b.prog.WordSize)
422+ for i, arg := range processedArgs {
423+ if i > 0 {
424+ b.out.WriteString(", ")
425 }
426-
427- fmt.Fprintf(b.out, "%s %s", b.formatType(argType), b.formatValue(arg))
428- if i < len(instr.Args)-2 { b.out.WriteString(", ") }
429+ fmt.Fprintf(b.out, "%s %s", b.formatType(arg.targetType), arg.value)
430 }
431 b.out.WriteString(")\n")
432 }
433
434 func (b *qbeBackend) formatValue(v ir.Value) string {
435- if v == nil { return "" }
436+ if v == nil {
437+ return ""
438+ }
439 switch val := v.(type) {
440 case *ir.Const: return fmt.Sprintf("%d", val.Value)
441- case *ir.FloatConst: return fmt.Sprintf("%s_%f", b.formatType(val.Typ), val.Value)
442- case *ir.Global: return "$" + val.Name
443+ case *ir.FloatConst:
444+ if val.Typ == ir.TypeS {
445+ // For 32-bit floats, truncate to float32 precision first
446+ float32Val := float32(val.Value)
447+ return fmt.Sprintf("s_%f", float64(float32Val))
448+ }
449+ return fmt.Sprintf("%s_%f", b.formatType(val.Typ), val.Value)
450+ case *ir.Global:
451+ return "$" + val.Name
452 case *ir.Temporary:
453 safeName := strings.NewReplacer(".", "_", "[", "_", "]", "_").Replace(val.Name)
454- if val.ID == -1 { return "%" + safeName }
455- if safeName != "" { return fmt.Sprintf("%%.%s_%d", safeName, val.ID) }
456+ if val.ID == -1 {
457+ return "%" + safeName
458+ }
459+ if safeName != "" {
460+ return fmt.Sprintf("%%.%s_%d", safeName, val.ID)
461+ }
462 return fmt.Sprintf("%%t%d", val.ID)
463- case *ir.Label: return "@" + val.Name
464- default: return ""
465+ case *ir.Label:
466+ return "@" + val.Name
467+ default:
468+ return ""
469 }
470 }
471
472 func (b *qbeBackend) formatType(t ir.Type) string {
473 switch t {
474- case ir.TypeB: return "b"
475- case ir.TypeH: return "h"
476- case ir.TypeW: return "w"
477- case ir.TypeL: return "l"
478- case ir.TypeS: return "s"
479- case ir.TypeD: return "d"
480- case ir.TypePtr: return b.formatType(ir.GetType(nil, b.prog.WordSize))
481- default: return ""
482+ case ir.TypeB, ir.TypeSB, ir.TypeUB:
483+ return "b"
484+ case ir.TypeH, ir.TypeSH, ir.TypeUH:
485+ return "h"
486+ case ir.TypeW:
487+ return "w"
488+ case ir.TypeL:
489+ return "l"
490+ case ir.TypeS:
491+ return "s"
492+ case ir.TypeD:
493+ return "d"
494+ case ir.TypePtr:
495+ return b.formatType(ir.GetType(nil, b.prog.WordSize))
496+ default:
497+ return ""
498 }
499 }
500
501 func (b *qbeBackend) getCmpInstType(argType ir.Type) string {
502- if argType == ir.TypeB || argType == ir.TypeH { return b.formatType(ir.GetType(nil, b.prog.WordSize)) }
503+ if b.isSubWordType(argType) {
504+ return b.formatType(ir.GetType(nil, b.prog.WordSize))
505+ }
506 return b.formatType(argType)
507 }
508
509 func (b *qbeBackend) formatOp(instr *ir.Instruction) (opStr string, isCall bool) {
510 typ := instr.Typ
511 argType := instr.OperandType
512- if argType == ir.TypeNone { argType = instr.Typ }
513+ if argType == ir.TypeNone {
514+ argType = instr.Typ
515+ }
516
517 typeStr := b.formatType(typ)
518 argTypeStr := b.getCmpInstType(argType)
519
520 switch instr.Op {
521 case ir.OpAlloc:
522- if instr.Align <= 4 { return "alloc4", false }
523- if instr.Align <= 8 { return "alloc8", false }
524+ if instr.Align <= 4 {
525+ return "alloc4", false
526+ }
527+ if instr.Align <= 8 {
528+ return "alloc8", false
529+ }
530 return "alloc16", false
531 case ir.OpLoad:
532 switch typ {
533- case ir.TypeB: return "loadub", false
534- case ir.TypeH: return "loaduh", false
535- case ir.TypePtr: return "load" + b.formatType(ir.GetType(nil, b.prog.WordSize)), false
536- default: return "load" + typeStr, false
537- }
538- case ir.OpStore: return "store" + typeStr, false
539- case ir.OpBlit: return "blit", false
540- case ir.OpAdd, ir.OpAddF: return "add", false
541- case ir.OpSub, ir.OpSubF: return "sub", false
542- case ir.OpMul, ir.OpMulF: return "mul", false
543- case ir.OpDiv, ir.OpDivF: return "div", false
544- case ir.OpRem, ir.OpRemF: return "rem", false
545- case ir.OpAnd: return "and", false
546- case ir.OpOr: return "or", false
547- case ir.OpXor: return "xor", false
548- case ir.OpShl: return "shl", false
549- case ir.OpShr: return "shr", false
550- case ir.OpNegF: return "neg", false
551- case ir.OpCEq: return "ceq" + argTypeStr, false
552- case ir.OpCNeq: return "cne" + argTypeStr, false
553+ case ir.TypeB:
554+ return "loadub", false // ambiguous, default to unsigned
555+ case ir.TypeSB:
556+ return "loadsb", false // signed byte
557+ case ir.TypeUB:
558+ return "loadub", false // unsigned byte
559+ case ir.TypeH:
560+ return "loaduh", false // ambiguous, default to unsigned
561+ case ir.TypeSH:
562+ return "loadsh", false // signed half
563+ case ir.TypeUH:
564+ return "loaduh", false // unsigned half
565+ case ir.TypePtr:
566+ return "load" + b.formatType(ir.GetType(nil, b.prog.WordSize)), false
567+ default:
568+ return "load" + typeStr, false
569+ }
570+ case ir.OpStore:
571+ return "store" + typeStr, false
572+ case ir.OpBlit:
573+ return "blit", false
574+ case ir.OpAdd, ir.OpAddF:
575+ return "add", false
576+ case ir.OpSub, ir.OpSubF:
577+ return "sub", false
578+ case ir.OpMul, ir.OpMulF:
579+ return "mul", false
580+ case ir.OpDiv, ir.OpDivF:
581+ return "div", false
582+ case ir.OpRem, ir.OpRemF:
583+ return "rem", false
584+ case ir.OpAnd:
585+ return "and", false
586+ case ir.OpOr:
587+ return "or", false
588+ case ir.OpXor:
589+ return "xor", false
590+ case ir.OpShl:
591+ return "shl", false
592+ case ir.OpShr:
593+ return "shr", false
594+ case ir.OpNegF:
595+ return "neg", false
596+ case ir.OpCEq:
597+ return "ceq" + argTypeStr, false
598+ case ir.OpCNeq:
599+ return "cne" + argTypeStr, false
600 case ir.OpCLt:
601- if argType == ir.TypeS || argType == ir.TypeD { return "clt" + argTypeStr, false }
602+ if argType == ir.TypeS || argType == ir.TypeD {
603+ return "clt" + argTypeStr, false
604+ }
605 return "cslt" + argTypeStr, false
606 case ir.OpCGt:
607- if argType == ir.TypeS || argType == ir.TypeD { return "cgt" + argTypeStr, false }
608+ if argType == ir.TypeS || argType == ir.TypeD {
609+ return "cgt" + argTypeStr, false
610+ }
611 return "csgt" + argTypeStr, false
612 case ir.OpCLe:
613- if argType == ir.TypeS || argType == ir.TypeD { return "cle" + argTypeStr, false }
614+ if argType == ir.TypeS || argType == ir.TypeD {
615+ return "cle" + argTypeStr, false
616+ }
617 return "csle" + argTypeStr, false
618 case ir.OpCGe:
619- if argType == ir.TypeS || argType == ir.TypeD { return "cge" + argTypeStr, false }
620+ if argType == ir.TypeS || argType == ir.TypeD {
621+ return "cge" + argTypeStr, false
622+ }
623 return "csge" + argTypeStr, false
624- case ir.OpJmp: return "jmp", false
625- case ir.OpJnz: return "jnz", false
626- case ir.OpRet: return "ret", false
627- case ir.OpCall: return "call", true
628- case ir.OpPhi: return "phi", false
629- case ir.OpSWToF: return "swtof", false
630- case ir.OpSLToF: return "sltof", false
631+ case ir.OpJmp:
632+ return "jmp", false
633+ case ir.OpJnz:
634+ return "jnz", false
635+ case ir.OpRet:
636+ return "ret", false
637+ case ir.OpCall:
638+ return "call", true
639+ case ir.OpPhi:
640+ return "phi", false
641+ case ir.OpSWToF:
642+ return "swtof", false
643+ case ir.OpSLToF:
644+ return "sltof", false
645 case ir.OpFToF:
646- if typ == ir.TypeD { return "exts", false }
647+ if typ == ir.TypeD {
648+ return "exts", false
649+ }
650 return "truncd", false
651 case ir.OpExtSB, ir.OpExtUB, ir.OpExtSH, ir.OpExtUH, ir.OpExtSW, ir.OpExtUW:
652 return "exts" + string(b.formatType(argType)[0]), false
653- case ir.OpFToSI: return "ftosi", false
654- case ir.OpFToUI: return "ftoui", false
655- case ir.OpCast: return "copy", false
656- default: return "unknown_op", false
657+ case ir.OpFToSI:
658+ return "ftosi", false
659+ case ir.OpFToUI:
660+ return "ftoui", false
661+ case ir.OpCast:
662+ return "copy", false
663+ default:
664+ return "unknown_op", false
665 }
666 }
+11,
-7
1@@ -30,6 +30,7 @@ const (
2 FeatContinue
3 FeatFloat
4 FeatStrictTypes
5+ FeatPromTypes
6 FeatCount
7 )
8
9@@ -48,10 +49,11 @@ const (
10 WarnPedantic
11 WarnUnreachableCode
12 WarnImplicitDecl
13- WarnType
14 WarnExtra
15 WarnFloat
16 WarnLocalAddress
17+ WarnDebugComp
18+ WarnPromTypes
19 WarnCount
20 )
21
22@@ -94,11 +96,11 @@ var archProperties = map[string]struct {
23 }
24
25 type Config struct {
26- Features map[Feature]Info
27- Warnings map[Warning]Info
28- FeatureMap map[string]Feature
29- WarningMap map[string]Warning
30- StdName string
31+ Features map[Feature]Info
32+ Warnings map[Warning]Info
33+ FeatureMap map[string]Feature
34+ WarningMap map[string]Warning
35+ StdName string
36 Target
37 LinkerArgs []string
38 LibRequests []string
39@@ -133,6 +135,7 @@ func NewConfig() *Config {
40 FeatNoDirectives: {"no-directives", false, "Disable `// [b]:` directives"},
41 FeatFloat: {"float", true, "Enable support for floating-point numbers"},
42 FeatStrictTypes: {"strict-types", false, "Disallow all incompatible type operations"},
43+ FeatPromTypes: {"prom-types", false, "Enable type promotions - promote untyped literals to compatible types"},
44 }
45
46 warnings := map[Warning]Info{
47@@ -148,10 +151,11 @@ func NewConfig() *Config {
48 WarnPedantic: {"pedantic", false, "Issue all warnings demanded by the strict standard"},
49 WarnUnreachableCode: {"unreachable-code", true, "Warn about code that will never be executed"},
50 WarnImplicitDecl: {"implicit-decl", true, "Warn about implicit function or variable declarations"},
51- WarnType: {"type", true, "Warn about type mismatches in expressions and assignments"},
52 WarnExtra: {"extra", true, "Enable extra miscellaneous warnings"},
53 WarnFloat: {"float", false, "Warn when floating-point numbers are used"},
54 WarnLocalAddress: {"local-address", true, "Warn when the address of a local variable is returned"},
55+ WarnDebugComp: {"debug-comp", false, "Debug warning for type promotions and conversions"},
56+ WarnPromTypes: {"prom-types", true, "Warn when type promotions occur"},
57 }
58
59 cfg.Features, cfg.Warnings = features, warnings
+57,
-40
1@@ -59,13 +59,17 @@ type Type int
2
3 const (
4 TypeNone Type = iota
5- TypeB // byte (8-bit)
6- TypeH // half-word (16-bit)
7+ TypeB // byte (8-bit, ambiguous signedness)
8+ TypeH // half-word (16-bit, ambiguous signedness)
9 TypeW // word (32-bit)
10 TypeL // long (64-bit)
11 TypeS // single float (32-bit)
12 TypeD // double float (64-bit)
13 TypePtr
14+ TypeSB // signed byte (8-bit)
15+ TypeUB // unsigned byte (8-bit)
16+ TypeSH // signed half-word (16-bit)
17+ TypeUH // unsigned half-word (16-bit)
18 )
19
20 type Value interface {
21@@ -78,10 +82,7 @@ type FloatConst struct{ Value float64; Typ Type }
22 type Global struct{ Name string }
23 type Temporary struct{ Name string; ID int }
24 type Label struct{ Name string }
25-type CastValue struct {
26- Value
27- TargetType string
28-}
29+type CastValue struct{ Value; TargetType string }
30
31 func (c *Const) isValue() {}
32 func (f *FloatConst) isValue() {}
33@@ -108,16 +109,9 @@ type Func struct {
34 Node *ast.Node
35 }
36
37-type Param struct {
38- Name string
39- Typ Type
40- Val Value
41-}
42+type Param struct{ Name string; Typ Type; Val Value }
43
44-type BasicBlock struct {
45- Label *Label
46- Instructions []*Instruction
47-}
48+type BasicBlock struct{ Label *Label; Instructions []*Instruction }
49
50 type Instruction struct {
51 Op Op
52@@ -147,40 +141,45 @@ type Data struct {
53 Items []DataItem
54 }
55
56-type DataItem struct {
57- Typ Type
58- Value Value
59- Count int
60-}
61+type DataItem struct{ Typ Type; Value Value; Count int }
62
63 func GetType(typ *ast.BxType, wordSize int) Type {
64- if typ == nil || typ.Kind == ast.TYPE_UNTYPED { return wordTypeFromSize(wordSize) }
65+ if typ == nil || typ.Kind == ast.TYPE_UNTYPED { return typeFromSize(wordSize, false) }
66
67 switch typ.Kind {
68- case ast.TYPE_UNTYPED_INT: return wordTypeFromSize(wordSize)
69- case ast.TYPE_UNTYPED_FLOAT: return TypeS
70+ case ast.TYPE_LITERAL_INT: return typeFromSize(wordSize, false)
71+ case ast.TYPE_LITERAL_FLOAT: return typeFromSize(wordSize, true)
72 case ast.TYPE_VOID: return TypeNone
73 case ast.TYPE_POINTER, ast.TYPE_ARRAY, ast.TYPE_STRUCT: return TypePtr
74+ case ast.TYPE_ENUM: return typeFromSize(wordSize, false)
75 case ast.TYPE_FLOAT:
76- switch typ.Name {
77- case "float", "float32": return TypeS
78- case "float64": return TypeD
79- default: return TypeS
80- }
81+ size := getTypeSizeByName(typ.Name, wordSize)
82+ return typeFromSize(int(size), true)
83 case ast.TYPE_PRIMITIVE:
84 switch typ.Name {
85- case "int", "uint", "string": return wordTypeFromSize(wordSize)
86+ case "int", "uint", "string": return typeFromSize(wordSize, false)
87 case "int64", "uint64": return TypeL
88 case "int32", "uint32": return TypeW
89- case "int16", "uint16": return TypeH
90- case "byte", "bool", "int8", "uint8": return TypeB
91- default: return wordTypeFromSize(wordSize)
92+ case "int16": return TypeSH
93+ case "uint16": return TypeUH
94+ case "int8": return TypeSB
95+ case "uint8": return TypeUB
96+ case "byte", "bool": return TypeB
97+ default: return typeFromSize(wordSize, false)
98 }
99 }
100- return wordTypeFromSize(wordSize)
101+ return typeFromSize(wordSize, false)
102 }
103
104-func wordTypeFromSize(size int) Type {
105+func typeFromSize(size int, isFloat bool) Type {
106+ if isFloat {
107+ switch size {
108+ case 4: return TypeS
109+ case 8: return TypeD
110+ default: return TypeD
111+ }
112+ }
113+
114 switch size {
115 case 8: return TypeL
116 case 4: return TypeW
117@@ -190,16 +189,36 @@ func wordTypeFromSize(size int) Type {
118 }
119 }
120
121+func getTypeSizeByName(typeName string, wordSize int) int64 {
122+ switch typeName {
123+ case "int64", "uint64", "float64": return 8
124+ case "int32", "uint32", "float32": return 4
125+ case "int16", "uint16": return 2
126+ case "int8", "uint8", "byte", "bool": return 1
127+ case "int", "uint", "string", "float": return int64(wordSize)
128+ }
129+ return 0
130+}
131+
132+type TypeSizeResolver struct{ wordSize int }
133+
134+func NewTypeSizeResolver(wordSize int) *TypeSizeResolver { return &TypeSizeResolver{wordSize: wordSize} }
135+
136+func (r *TypeSizeResolver) GetTypeSize(typeName string) int64 { return getTypeSizeByName(typeName, r.wordSize) }
137+
138+func floatTypeFromSize(size int) Type { return typeFromSize(size, true) }
139+
140 func SizeOfType(t Type, wordSize int) int64 {
141 switch t {
142- case TypeB: return 1
143- case TypeH: return 2
144+ case TypeB, TypeSB, TypeUB: return 1
145+ case TypeH, TypeSH, TypeUH: return 2
146 case TypeW: return 4
147 case TypeL: return 8
148 case TypeS: return 4
149 case TypeD: return 8
150 case TypePtr: return int64(wordSize)
151- default: return int64(wordSize)
152+ default:
153+ return int64(wordSize)
154 }
155 }
156
157@@ -223,9 +242,7 @@ func (p *Program) FindFunc(name string) *Func {
158 func (p *Program) FindFuncSymbol(name string) *ast.Node {
159 if p.GlobalSymbols != nil {
160 if node, ok := p.GlobalSymbols[name]; ok {
161- if _, isFunc := node.Data.(ast.FuncDeclNode); isFunc {
162- return node
163- }
164+ if _, isFunc := node.Data.(ast.FuncDeclNode); isFunc { return node }
165 }
166 }
167 return nil
+189,
-42
1@@ -30,7 +30,9 @@ func (l *Lexer) Next() token.Token {
2 l.skipWhitespaceAndComments()
3 startPos, startCol, startLine := l.pos, l.column, l.line
4
5- if l.isAtEnd() { return l.makeToken(token.EOF, "", startPos, startCol, startLine) }
6+ if l.isAtEnd() {
7+ return l.makeToken(token.EOF, "", startPos, startCol, startLine)
8+ }
9
10 if l.peek() == '/' && l.peekNext() == '/' {
11 if !l.cfg.IsFeatureEnabled(config.FeatNoDirectives) {
12@@ -69,22 +71,33 @@ func (l *Lexer) Next() token.Token {
13 case '!': return l.matchThen('=', token.Neq, token.Not, startPos, startCol, startLine)
14 case '^': return l.matchThen('=', token.XorEq, token.Xor, startPos, startCol, startLine)
15 case '%': return l.matchThen('=', token.RemEq, token.Rem, startPos, startCol, startLine)
16- case '+': return l.plus(startPos, startCol, startLine)
17- case '-': return l.minus(startPos, startCol, startLine)
18- case '*': return l.star(startPos, startCol, startLine)
19- case '/': return l.slash(startPos, startCol, startLine)
20- case '&': return l.ampersand(startPos, startCol, startLine)
21- case '|': return l.pipe(startPos, startCol, startLine)
22- case '<': return l.less(startPos, startCol, startLine)
23- case '>': return l.greater(startPos, startCol, startLine)
24- case '=': return l.equal(startPos, startCol, startLine)
25+ case '+':
26+ return l.plus(startPos, startCol, startLine)
27+ case '-':
28+ return l.minus(startPos, startCol, startLine)
29+ case '*':
30+ return l.star(startPos, startCol, startLine)
31+ case '/':
32+ return l.slash(startPos, startCol, startLine)
33+ case '&':
34+ return l.ampersand(startPos, startCol, startLine)
35+ case '|':
36+ return l.pipe(startPos, startCol, startLine)
37+ case '<':
38+ return l.less(startPos, startCol, startLine)
39+ case '>':
40+ return l.greater(startPos, startCol, startLine)
41+ case '=':
42+ return l.equal(startPos, startCol, startLine)
43 case '.':
44 if l.match('.') && l.match('.') {
45 return l.makeToken(token.Dots, "", startPos, startCol, startLine)
46 }
47 return l.makeToken(token.Dot, "", startPos, startCol, startLine)
48- case '"': return l.stringLiteral(startPos, startCol, startLine)
49- case '\'': return l.charLiteral(startPos, startCol, startLine)
50+ case '"':
51+ return l.stringLiteral(startPos, startCol, startLine)
52+ case '\'':
53+ return l.charLiteral(startPos, startCol, startLine)
54 }
55
56 tok := l.makeToken(token.EOF, "", startPos, startCol, startLine)
57@@ -94,17 +107,23 @@ func (l *Lexer) Next() token.Token {
58 }
59
60 func (l *Lexer) peek() rune {
61- if l.isAtEnd() { return 0 }
62+ if l.isAtEnd() {
63+ return 0
64+ }
65 return l.source[l.pos]
66 }
67
68 func (l *Lexer) peekNext() rune {
69- if l.pos+1 >= len(l.source) { return 0 }
70+ if l.pos+1 >= len(l.source) {
71+ return 0
72+ }
73 return l.source[l.pos+1]
74 }
75
76 func (l *Lexer) advance() rune {
77- if l.isAtEnd() { return 0 }
78+ if l.isAtEnd() {
79+ return 0
80+ }
81 ch := l.source[l.pos]
82 if ch == '\n' {
83 l.line++
84@@ -117,7 +136,9 @@ func (l *Lexer) advance() rune {
85 }
86
87 func (l *Lexer) match(expected rune) bool {
88- if l.isAtEnd() || l.source[l.pos] != expected { return false }
89+ if l.isAtEnd() || l.source[l.pos] != expected {
90+ return false
91+ }
92 l.advance()
93 return true
94 }
95@@ -134,14 +155,16 @@ func (l *Lexer) makeToken(tokType token.Type, value string, startPos, startCol,
96 func (l *Lexer) skipWhitespaceAndComments() {
97 for {
98 switch l.peek() {
99- case ' ', '\t', '\n', '\r': l.advance()
100+ case ' ', '\t', '\n', '\r':
101+ l.advance()
102 case '/':
103 if l.peekNext() == '*' {
104 l.blockComment()
105 } else {
106 return
107 }
108- default: return
109+ default:
110+ return
111 }
112 }
113 }
114@@ -162,7 +185,9 @@ func (l *Lexer) blockComment() {
115 }
116
117 func (l *Lexer) lineComment() {
118- for !l.isAtEnd() && l.peek() != '\n' { l.advance() }
119+ for !l.isAtEnd() && l.peek() != '\n' {
120+ l.advance()
121+ }
122 }
123
124 func (l *Lexer) lineCommentOrDirective(startPos, startCol, startLine int) (token.Token, bool) {
125@@ -170,7 +195,9 @@ func (l *Lexer) lineCommentOrDirective(startPos, startCol, startLine int) (token
126 l.advance()
127 l.advance()
128 commentStartPos := l.pos
129- for !l.isAtEnd() && l.peek() != '\n' { l.advance() }
130+ for !l.isAtEnd() && l.peek() != '\n' {
131+ l.advance()
132+ }
133 commentContent := string(l.source[commentStartPos:l.pos])
134 trimmedContent := strings.TrimSpace(commentContent)
135
136@@ -214,14 +241,18 @@ func (l *Lexer) numberLiteral(startPos, startCol, startLine int) token.Token {
137 l.advance()
138 }
139 } else {
140- for unicode.IsDigit(l.peek()) { l.advance() }
141+ for unicode.IsDigit(l.peek()) {
142+ l.advance()
143+ }
144 }
145
146 if l.peek() == '.' {
147 if unicode.IsDigit(l.peekNext()) {
148 isFloat = true
149 l.advance()
150- for unicode.IsDigit(l.peek()) { l.advance() }
151+ for unicode.IsDigit(l.peek()) {
152+ l.advance()
153+ }
154 }
155 }
156
157@@ -229,11 +260,15 @@ func (l *Lexer) numberLiteral(startPos, startCol, startLine int) token.Token {
158 if (l.peek() == 'e' || l.peek() == 'E') && !strings.HasPrefix(valueStr, "0x") && !strings.HasPrefix(valueStr, "0X") {
159 isFloat = true
160 l.advance()
161- if l.peek() == '+' || l.peek() == '-' { l.advance() }
162+ if l.peek() == '+' || l.peek() == '-' {
163+ l.advance()
164+ }
165 if !unicode.IsDigit(l.peek()) {
166 util.Error(l.makeToken(token.FloatNumber, "", startPos, startCol, startLine), "Malformed floating-point literal: exponent has no digits")
167 }
168- for unicode.IsDigit(l.peek()) { l.advance() }
169+ for unicode.IsDigit(l.peek()) {
170+ l.advance()
171+ }
172 }
173
174 valueStr = string(l.source[startPos:l.pos])
175@@ -277,7 +312,18 @@ func (l *Lexer) stringLiteral(startPos, startCol, startLine int) token.Token {
176 }
177 if (c == '\\' && l.cfg.IsFeatureEnabled(config.FeatCEsc)) || (c == '*' && l.cfg.IsFeatureEnabled(config.FeatBEsc)) {
178 l.advance()
179- sb.WriteRune(rune(l.decodeEscape(c, startPos, startCol, startLine)))
180+ val := l.decodeEscape(c, startPos, startCol, startLine)
181+ // For values 0-127, write as regular rune. For 128-255, write as raw byte
182+ if val <= 127 {
183+ sb.WriteRune(rune(val))
184+ } else {
185+ // Build the final string using a byte slice to avoid UTF-8 encoding
186+ existing := sb.String()
187+ newBuf := []byte(existing)
188+ newBuf = append(newBuf, byte(val))
189+ sb.Reset()
190+ sb.WriteString(string(newBuf))
191+ }
192 } else {
193 l.advance()
194 sb.WriteRune(c)
195@@ -303,7 +349,9 @@ func (l *Lexer) charLiteral(startPos, startCol, startLine int) token.Token {
196 }
197
198 tok := l.makeToken(token.Number, "", startPos, startCol, startLine)
199- if !l.match('\'') { util.Error(tok, "Unterminated character literal") }
200+ if !l.match('\'') {
201+ util.Error(tok, "Unterminated character literal")
202+ }
203 tok.Value = strconv.FormatInt(word, 10)
204 return tok
205 }
206@@ -314,63 +362,162 @@ func (l *Lexer) decodeEscape(escapeChar rune, startPos, startCol, startLine int)
207 return 0
208 }
209 c := l.advance()
210- escapes := map[rune]int64{'n': '\n', 't': '\t', 'e': 4, 'b': '\b', 'r': '\r', '0': 0, '(': '{', ')': '}', '\\': '\\', '\'': '\'', '"': '"', '*': '*'}
211- if val, ok := escapes[c]; ok { return val }
212+
213+ // Handle hex escape sequences (\x followed by exactly 2 hex digits)
214+ if c == 'x' {
215+ return l.parseHexEscape(2, escapeChar, startPos, startCol, startLine)
216+ }
217+
218+ // Handle unicode escape sequences (\u followed by exactly 4 hex digits)
219+ if c == 'u' {
220+ return l.parseHexEscape(4, escapeChar, startPos, startCol, startLine)
221+ }
222+
223+ // Handle unicode escape sequences (\U followed by exactly 8 hex digits)
224+ if c == 'U' {
225+ return l.parseHexEscape(8, escapeChar, startPos, startCol, startLine)
226+ }
227+
228+ // Handle octal escape sequences (\000-\377) - Go-style: exactly 3 digits required
229+ if c >= '0' && c <= '7' {
230+ val := int64(c - '0')
231+ digitsRead := 1
232+
233+ // Read exactly 2 more digits (for 3 total - Go behavior)
234+ for i := 0; i < 2; i++ {
235+ if l.isAtEnd() {
236+ util.Warn(l.cfg, config.WarnUnrecognizedEscape, l.makeToken(token.String, "", startPos, startCol, startLine),
237+ "Octal escape sequence '%c%c...' requires exactly 3 digits, got %d (use \\%03o for Go-style)", escapeChar, c, digitsRead, val)
238+ return val
239+ }
240+ next := l.peek()
241+ if next >= '0' && next <= '7' {
242+ val = val*8 + int64(next-'0')
243+ l.advance()
244+ digitsRead++
245+ } else {
246+ util.Warn(l.cfg, config.WarnUnrecognizedEscape, l.makeToken(token.String, "", startPos, startCol, startLine),
247+ "Octal escape sequence '%c%c...' requires exactly 3 digits, got %d (use \\%03o for Go-style)", escapeChar, c, digitsRead, val)
248+ return val
249+ }
250+ }
251+ return val
252+ }
253+
254+ escapes := map[rune]int64{
255+ 'n': '\n', 't': '\t', 'e': 4, 'b': '\b', 'r': '\r',
256+ '(': '{', ')': '}', '\\': '\\', '\'': '\'', '"': '"', '*': '*',
257+ 'a': '\a', 'f': '\f', 'v': '\v', '0': 0,
258+ }
259+ if val, ok := escapes[c]; ok {
260+ return val
261+ }
262 util.Warn(l.cfg, config.WarnUnrecognizedEscape, l.makeToken(token.String, "", startPos, startCol, startLine), "Unrecognized escape sequence '%c%c'", escapeChar, c)
263 return int64(c)
264 }
265
266+func (l *Lexer) parseHexEscape(numDigits int, escapeChar rune, startPos, startCol, startLine int) int64 {
267+ var val int64
268+ for i := 0; i < numDigits; i++ {
269+ if l.isAtEnd() {
270+ util.Error(l.makeToken(token.String, "", startPos, startCol, startLine), "Incomplete hex escape sequence '%c%c' - expected %d hex digits", escapeChar, 'x', numDigits)
271+ return 0
272+ }
273+ c := l.peek()
274+ var digit int64
275+ switch {
276+ case c >= '0' && c <= '9': digit = int64(c - '0')
277+ case c >= 'a' && c <= 'f': digit = int64(c - 'a' + 10)
278+ case c >= 'A' && c <= 'F': digit = int64(c - 'A' + 10)
279+ default:
280+ util.Error(l.makeToken(token.String, "", startPos, startCol, startLine), "Invalid hex digit '%c' in escape sequence", c)
281+ return 0
282+ }
283+ val = val*16 + digit
284+ l.advance()
285+ }
286+ return val
287+}
288+
289 func (l *Lexer) matchThen(expected rune, thenType, elseType token.Type, sPos, sCol, sLine int) token.Token {
290- if l.match(expected) { return l.makeToken(thenType, "", sPos, sCol, sLine) }
291+ if l.match(expected) {
292+ return l.makeToken(thenType, "", sPos, sCol, sLine)
293+ }
294 return l.makeToken(elseType, "", sPos, sCol, sLine)
295 }
296
297 func (l *Lexer) plus(sPos, sCol, sLine int) token.Token {
298- if l.match('+') { return l.makeToken(token.Inc, "", sPos, sCol, sLine) }
299- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.PlusEq, "", sPos, sCol, sLine) }
300+ if l.match('+') {
301+ return l.makeToken(token.Inc, "", sPos, sCol, sLine)
302+ }
303+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
304+ return l.makeToken(token.PlusEq, "", sPos, sCol, sLine)
305+ }
306 return l.makeToken(token.Plus, "", sPos, sCol, sLine)
307 }
308
309 func (l *Lexer) minus(sPos, sCol, sLine int) token.Token {
310- if l.match('-') { return l.makeToken(token.Dec, "", sPos, sCol, sLine) }
311- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.MinusEq, "", sPos, sCol, sLine) }
312+ if l.match('-') {
313+ return l.makeToken(token.Dec, "", sPos, sCol, sLine)
314+ }
315+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
316+ return l.makeToken(token.MinusEq, "", sPos, sCol, sLine)
317+ }
318 return l.makeToken(token.Minus, "", sPos, sCol, sLine)
319 }
320
321 func (l *Lexer) star(sPos, sCol, sLine int) token.Token {
322- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.StarEq, "", sPos, sCol, sLine) }
323+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
324+ return l.makeToken(token.StarEq, "", sPos, sCol, sLine)
325+ }
326 return l.makeToken(token.Star, "", sPos, sCol, sLine)
327 }
328
329 func (l *Lexer) slash(sPos, sCol, sLine int) token.Token {
330- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.SlashEq, "", sPos, sCol, sLine) }
331+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
332+ return l.makeToken(token.SlashEq, "", sPos, sCol, sLine)
333+ }
334 return l.makeToken(token.Slash, "", sPos, sCol, sLine)
335 }
336
337 func (l *Lexer) ampersand(sPos, sCol, sLine int) token.Token {
338- if l.match('&') { return l.makeToken(token.AndAnd, "", sPos, sCol, sLine) }
339- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.AndEq, "", sPos, sCol, sLine) }
340+ if l.match('&') {
341+ return l.makeToken(token.AndAnd, "", sPos, sCol, sLine)
342+ }
343+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
344+ return l.makeToken(token.AndEq, "", sPos, sCol, sLine)
345+ }
346 return l.makeToken(token.And, "", sPos, sCol, sLine)
347 }
348
349 func (l *Lexer) pipe(sPos, sCol, sLine int) token.Token {
350- if l.match('|') { return l.makeToken(token.OrOr, "", sPos, sCol, sLine) }
351- if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') { return l.makeToken(token.OrEq, "", sPos, sCol, sLine) }
352+ if l.match('|') {
353+ return l.makeToken(token.OrOr, "", sPos, sCol, sLine)
354+ }
355+ if l.cfg.IsFeatureEnabled(config.FeatCOps) && l.match('=') {
356+ return l.makeToken(token.OrEq, "", sPos, sCol, sLine)
357+ }
358 return l.makeToken(token.Or, "", sPos, sCol, sLine)
359 }
360
361 func (l *Lexer) less(sPos, sCol, sLine int) token.Token {
362- if l.match('<') { return l.matchThen('=', token.ShlEq, token.Shl, sPos, sCol, sLine) }
363+ if l.match('<') {
364+ return l.matchThen('=', token.ShlEq, token.Shl, sPos, sCol, sLine)
365+ }
366 return l.matchThen('=', token.Lte, token.Lt, sPos, sCol, sLine)
367 }
368
369 func (l *Lexer) greater(sPos, sCol, sLine int) token.Token {
370- if l.match('>') { return l.matchThen('=', token.ShrEq, token.Shr, sPos, sCol, sLine) }
371+ if l.match('>') {
372+ return l.matchThen('=', token.ShrEq, token.Shr, sPos, sCol, sLine)
373+ }
374 return l.matchThen('=', token.Gte, token.Gt, sPos, sCol, sLine)
375 }
376
377 func (l *Lexer) equal(sPos, sCol, sLine int) token.Token {
378- if l.match('=') { return l.makeToken(token.EqEq, "", sPos, sCol, sLine) }
379+ if l.match('=') {
380+ return l.makeToken(token.EqEq, "", sPos, sCol, sLine)
381+ }
382 if l.cfg.IsFeatureEnabled(config.FeatBOps) {
383 switch {
384 case l.match('+'): return l.makeToken(token.EqPlus, "", sPos, sCol, sLine)
+413,
-85
1@@ -28,7 +28,9 @@ func NewParser(tokens []token.Token, cfg *config.Config) *Parser {
2 isTypedPass: cfg.IsFeatureEnabled(config.FeatTyped),
3 typeNames: make(map[string]bool),
4 }
5- if len(tokens) > 0 { p.current = p.tokens[0] }
6+ if len(tokens) > 0 {
7+ p.current = p.tokens[0]
8+ }
9
10 if p.isTypedPass {
11 for keyword, tokType := range token.KeywordMap {
12@@ -44,12 +46,16 @@ func (p *Parser) advance() {
13 if p.pos < len(p.tokens) {
14 p.previous = p.current
15 p.pos++
16- if p.pos < len(p.tokens) { p.current = p.tokens[p.pos] }
17+ if p.pos < len(p.tokens) {
18+ p.current = p.tokens[p.pos]
19+ }
20 }
21 }
22
23 func (p *Parser) peek() token.Token {
24- if p.pos+1 < len(p.tokens) { return p.tokens[p.pos+1] }
25+ if p.pos+1 < len(p.tokens) {
26+ return p.tokens[p.pos+1]
27+ }
28 return p.tokens[len(p.tokens)-1]
29 }
30
31@@ -82,10 +88,8 @@ func (p *Parser) isTypeName(name string) bool {
32 func isLValue(node *ast.Node) bool {
33 if node == nil { return false }
34 switch node.Type {
35- case ast.Ident, ast.Indirection, ast.Subscript, ast.MemberAccess:
36- return true
37- default:
38- return false
39+ case ast.Ident, ast.Indirection, ast.Subscript, ast.MemberAccess: return true
40+ default: return false
41 }
42 }
43
44@@ -93,8 +97,7 @@ func (p *Parser) Parse() *ast.Node {
45 var stmts []*ast.Node
46 tok := p.current
47 for !p.check(token.EOF) {
48- for p.match(token.Semi) {
49- }
50+ for p.match(token.Semi) {}
51 if p.check(token.EOF) { break }
52
53 stmt := p.parseTopLevel()
54@@ -126,12 +129,8 @@ func (p *Parser) parseTopLevel() *ast.Node {
55 }
56 stmt = ast.NewDirective(currentTok, directiveVal)
57 p.advance()
58- case token.TypeKeyword:
59- p.advance()
60- stmt = p.parseTypeDecl()
61- case token.Extrn:
62- p.advance()
63- stmt = p.parseUntypedDeclarationList(token.Extrn, currentTok)
64+ case token.TypeKeyword: p.advance(); stmt = p.parseTypeDecl()
65+ case token.Extrn: p.advance(); stmt = p.parseUntypedDeclarationList(token.Extrn, currentTok)
66 case token.Auto:
67 if p.isBxDeclarationAhead() {
68 stmt = p.parseDeclaration(true)
69@@ -149,6 +148,21 @@ func (p *Parser) parseTopLevel() *ast.Node {
70 } else if peekTok.Type == token.Asm {
71 p.advance()
72 stmt = p.parseAsmFuncDef(identTok)
73+ } else if peekTok.Type == token.Extrn {
74+ // Handle typed external declaration: type_name extrn function_name;
75+ if p.isTypedPass && p.isTypeName(identTok.Value) {
76+ returnType := p.typeFromName(identTok.Value)
77+ if returnType != nil {
78+ p.advance() // consume type name
79+ p.advance() // consume 'extrn'
80+ stmt = p.parseTypedExtrnDecl(identTok, returnType)
81+ } else {
82+ // Fallback to regular parsing if type not found
83+ stmt = p.parseUntypedGlobalDefinition(identTok)
84+ }
85+ } else {
86+ stmt = p.parseUntypedGlobalDefinition(identTok)
87+ }
88 } else if p.isTypedPass && p.isTypeName(identTok.Value) && peekTok.Type != token.Define {
89 stmt = p.parseTypedVarOrFuncDecl(true)
90 } else if p.isBxDeclarationAhead() {
91@@ -177,16 +191,24 @@ func (p *Parser) isBxDeclarationAhead() bool {
92 defer func() { p.pos, p.current = originalPos, originalCurrent }()
93
94 hasAuto := p.match(token.Auto)
95- if !p.check(token.Ident) { return false }
96+ if !p.check(token.Ident) {
97+ return false
98+ }
99 p.advance()
100
101 for p.match(token.Comma) {
102- if !p.check(token.Ident) { return false }
103+ if !p.check(token.Ident) {
104+ return false
105+ }
106 p.advance()
107 }
108
109- if p.check(token.Define) { return true }
110- if p.check(token.Eq) { return hasAuto }
111+ if p.check(token.Define) {
112+ return true
113+ }
114+ if p.check(token.Eq) {
115+ return hasAuto
116+ }
117 return false
118 }
119
120@@ -194,6 +216,39 @@ func (p *Parser) isBuiltinType(tok token.Token) bool {
121 return tok.Type >= token.Void && tok.Type <= token.Any
122 }
123
124+// isPointerCastAhead checks if the current position looks like a pointer cast: (type*)
125+// This allows complex pointer casts while disallowing simple scalar C-style casts
126+func (p *Parser) isPointerCastAhead() bool {
127+ if !p.isTypedPass { return false }
128+
129+ originalPos, originalCurrent := p.pos, p.current
130+ defer func() { p.pos, p.current = originalPos, originalCurrent }()
131+
132+ if p.match(token.Const) {}
133+
134+ if p.isBuiltinType(p.current) {
135+ p.advance()
136+ } else if p.check(token.Ident) && p.isTypeName(p.current.Value) {
137+ p.advance()
138+ } else {
139+ return false
140+ }
141+
142+ hasPointer := false
143+ if p.match(token.Star) {
144+ hasPointer = true
145+ for p.match(token.Star) {}
146+ }
147+
148+ if p.match(token.LBracket) {
149+ hasPointer = true
150+ for !p.check(token.RBracket) && !p.check(token.EOF) { p.advance() }
151+ if p.check(token.RBracket) { p.advance() }
152+ }
153+
154+ return hasPointer && p.check(token.RParen)
155+}
156+
157 func (p *Parser) parseStmt() *ast.Node {
158 tok := p.current
159
160@@ -228,7 +283,9 @@ func (p *Parser) parseStmt() *ast.Node {
161 p.expect(token.RParen, "Expected ')' after if condition")
162 thenBody := p.parseStmt()
163 var elseBody *ast.Node
164- if p.match(token.Else) { elseBody = p.parseStmt() }
165+ if p.match(token.Else) {
166+ elseBody = p.parseStmt()
167+ }
168 return ast.NewIf(tok, cond, thenBody, elseBody)
169 case p.match(token.While):
170 p.expect(token.LParen, "Expected '(' after 'while'")
171@@ -239,13 +296,17 @@ func (p *Parser) parseStmt() *ast.Node {
172 case p.match(token.Switch):
173 hasParen := p.match(token.LParen)
174 expr := p.parseExpr()
175- if hasParen { p.expect(token.RParen, "Expected ')' after switch expression") }
176+ if hasParen {
177+ p.expect(token.RParen, "Expected ')' after switch expression")
178+ }
179 body := p.parseStmt()
180 return ast.NewSwitch(tok, expr, body)
181 case p.check(token.LBrace):
182 return p.parseBlockStmt()
183 case p.check(token.Auto):
184- if p.isBxDeclarationAhead() { return p.parseDeclaration(true) }
185+ if p.isBxDeclarationAhead() {
186+ return p.parseDeclaration(true)
187+ }
188 p.advance()
189 return p.parseUntypedDeclarationList(token.Auto, p.previous)
190 case p.match(token.Extrn):
191@@ -254,7 +315,9 @@ func (p *Parser) parseStmt() *ast.Node {
192 var values []*ast.Node
193 for {
194 values = append(values, p.parseExpr())
195- if !p.match(token.Comma) { break }
196+ if !p.match(token.Comma) {
197+ break
198+ }
199 }
200 p.expect(token.Colon, "Expected ':' after case value")
201 body := p.parseStmt()
202@@ -278,7 +341,9 @@ func (p *Parser) parseStmt() *ast.Node {
203 }
204 if !isKeyword {
205 util.Error(p.current, "Expected label name after 'goto'")
206- for !p.check(token.Semi) && !p.check(token.EOF) { p.advance() }
207+ for !p.check(token.Semi) && !p.check(token.EOF) {
208+ p.advance()
209+ }
210 } else {
211 if labelName == "continue" {
212 util.Warn(p.cfg, config.WarnExtra, p.current, "'goto continue' is a workaround for a limitation of -std=B; please avoid this construct")
213@@ -293,7 +358,9 @@ func (p *Parser) parseStmt() *ast.Node {
214 var expr *ast.Node
215 if !p.check(token.Semi) {
216 p.expect(token.LParen, "Expected '(' after 'return' with value")
217- if !p.check(token.RParen) { expr = p.parseExpr() }
218+ if !p.check(token.RParen) {
219+ expr = p.parseExpr()
220+ }
221 p.expect(token.RParen, "Expected ')' after return value")
222 }
223 p.expect(token.Semi, "Expected ';' after return statement")
224@@ -307,24 +374,31 @@ func (p *Parser) parseStmt() *ast.Node {
225 }
226 p.expect(token.Semi, "Expected ';' after 'continue'")
227 return ast.NewContinue(tok)
228- case p.match(token.Semi):
229- return ast.NewBlock(tok, nil, true)
230+ case p.match(token.Semi): return ast.NewBlock(tok, nil, true)
231 default:
232 if p.check(token.Ident) {
233 isShortDecl := false
234 originalPos, originalCurrent := p.pos, p.current
235 p.advance()
236 for p.match(token.Comma) {
237- if !p.check(token.Ident) { break }
238+ if !p.check(token.Ident) {
239+ break
240+ }
241 p.advance()
242 }
243- if p.check(token.Define) { isShortDecl = true }
244+ if p.check(token.Define) {
245+ isShortDecl = true
246+ }
247 p.pos, p.current = originalPos, originalCurrent
248- if isShortDecl { return p.parseDeclaration(false) }
249+ if isShortDecl {
250+ return p.parseDeclaration(false)
251+ }
252 }
253
254 expr := p.parseExpr()
255- if expr != nil { p.expect(token.Semi, "Expected ';' after expression statement") }
256+ if expr != nil {
257+ p.expect(token.Semi, "Expected ';' after expression statement")
258+ }
259 return expr
260 }
261 }
262@@ -358,7 +432,9 @@ func (p *Parser) parseDeclaration(hasAuto bool) *ast.Node {
263 for {
264 p.expect(token.Ident, "Expected identifier in declaration")
265 names = append(names, ast.NewIdent(p.previous, p.previous.Value))
266- if !p.match(token.Comma) { break }
267+ if !p.match(token.Comma) {
268+ break
269+ }
270 }
271
272 var op token.Type
273@@ -374,7 +450,9 @@ func (p *Parser) parseDeclaration(hasAuto bool) *ast.Node {
274 if op != 0 {
275 for {
276 inits = append(inits, p.parseAssignmentExpr())
277- if !p.match(token.Comma) { break }
278+ if !p.match(token.Comma) {
279+ break
280+ }
281 }
282 if len(names) != len(inits) {
283 util.Error(declTok, "Mismatched number of variables and initializers (%d vs %d)", len(names), len(inits))
284@@ -386,14 +464,18 @@ func (p *Parser) parseDeclaration(hasAuto bool) *ast.Node {
285 var decls []*ast.Node
286 for i, nameNode := range names {
287 var initList []*ast.Node
288- if i < len(inits) { initList = append(initList, inits[i]) }
289+ if i < len(inits) {
290+ initList = append(initList, inits[i])
291+ }
292 name := nameNode.Data.(ast.IdentNode).Name
293 decls = append(decls, ast.NewVarDecl(nameNode.Tok, name, ast.TypeUntyped, initList, nil, false, false, isDefine))
294 }
295
296 p.expect(token.Semi, "Expected ';' after declaration")
297
298- if len(decls) == 1 { return decls[0] }
299+ if len(decls) == 1 {
300+ return decls[0]
301+ }
302 return ast.NewMultiVarDecl(declTok, decls)
303 }
304
305@@ -403,10 +485,12 @@ func (p *Parser) parseUntypedDeclarationList(declType token.Type, declTok token.
306 for {
307 p.expect(token.Ident, "Expected identifier in 'extrn' list")
308 names = append(names, ast.NewIdent(p.previous, p.previous.Value))
309- if !p.match(token.Comma) { break }
310+ if !p.match(token.Comma) {
311+ break
312+ }
313 }
314 p.expect(token.Semi, "Expected ';' after 'extrn' declaration")
315- return ast.NewExtrnDecl(declTok, names)
316+ return ast.NewExtrnDecl(declTok, names, nil)
317 }
318
319 var decls []*ast.Node
320@@ -437,7 +521,9 @@ func (p *Parser) parseUntypedDeclarationList(declType token.Type, declTok token.
321 util.Error(p.previous, "Classic B 'auto' vectors use 'auto name size', not 'auto name[size]'")
322 }
323 isVector, isBracketed = true, true
324- if !p.check(token.RBracket) { sizeExpr = p.parseExpr() }
325+ if !p.check(token.RBracket) {
326+ sizeExpr = p.parseExpr()
327+ }
328 p.expect(token.RBracket, "Expected ']' after array size")
329 } else if p.check(token.Number) {
330 isVector = true
331@@ -449,17 +535,23 @@ func (p *Parser) parseUntypedDeclarationList(declType token.Type, declTok token.
332 }
333
334 decls = append(decls, ast.NewVarDecl(itemToken, name, nil, nil, sizeExpr, isVector, isBracketed, false))
335- if !p.match(token.Comma) { break }
336+ if !p.match(token.Comma) {
337+ break
338+ }
339 }
340 p.expect(token.Semi, "Expected ';' after declaration list")
341
342- if len(decls) == 1 { return decls[0] }
343+ if len(decls) == 1 {
344+ return decls[0]
345+ }
346 return ast.NewMultiVarDecl(declTok, decls)
347 }
348
349 func (p *Parser) parseUntypedGlobalDefinition(nameToken token.Token) *ast.Node {
350 name := nameToken.Value
351- if p.isTypeName(name) { util.Error(nameToken, "Variable name '%s' shadows a type", name) }
352+ if p.isTypeName(name) {
353+ util.Error(nameToken, "Variable name '%s' shadows a type", name)
354+ }
355 p.advance()
356
357 var sizeExpr *ast.Node
358@@ -467,19 +559,30 @@ func (p *Parser) parseUntypedGlobalDefinition(nameToken token.Token) *ast.Node {
359
360 if p.match(token.LBracket) {
361 isVector, isBracketed = true, true
362- if !p.check(token.RBracket) { sizeExpr = p.parseExpr() }
363+ if !p.check(token.RBracket) {
364+ sizeExpr = p.parseExpr()
365+ }
366 p.expect(token.RBracket, "Expected ']' for vector definition")
367 }
368
369 var initList []*ast.Node
370 if !p.check(token.Semi) {
371+ // Check if this is an attempt to assign without declaration
372+ if p.check(token.Eq) {
373+ util.Error(nameToken, "Assignment without declaration is not allowed at global scope. Use ':=' or make it a typed declaration and initialization")
374+ return nil
375+ }
376 initList = append(initList, p.parseUnaryExpr())
377 if isBracketed || p.match(token.Comma) || (!p.check(token.Semi) && !p.check(token.EOF)) {
378 isVector = true
379- if p.previous.Type != token.Comma { p.match(token.Comma) }
380+ if p.previous.Type != token.Comma {
381+ p.match(token.Comma)
382+ }
383 for !p.check(token.Semi) && !p.check(token.EOF) {
384 initList = append(initList, p.parseUnaryExpr())
385- if p.check(token.Semi) || p.check(token.EOF) { break }
386+ if p.check(token.Semi) || p.check(token.EOF) {
387+ break
388+ }
389 p.match(token.Comma)
390 }
391 }
392@@ -495,7 +598,9 @@ func (p *Parser) parseUntypedGlobalDefinition(nameToken token.Token) *ast.Node {
393
394 func (p *Parser) parseFuncDecl(returnType *ast.BxType, nameToken token.Token) *ast.Node {
395 name := nameToken.Value
396- if p.isTypeName(name) { util.Error(nameToken, "Function name '%s' shadows a type", name) }
397+ if p.isTypeName(name) {
398+ util.Error(nameToken, "Function name '%s' shadows a type", name)
399+ }
400 p.expect(token.LParen, "Expected '(' after function name")
401
402 var params []*ast.Node
403@@ -543,7 +648,9 @@ func (p *Parser) parseFuncDecl(returnType *ast.BxType, nameToken token.Token) *a
404
405 if returnType == nil {
406 returnType = ast.TypeUntyped
407- if isTyped { returnType = ast.TypeInt }
408+ if isTyped {
409+ returnType = ast.TypeInt
410+ }
411 }
412
413 return ast.NewFuncDecl(nameToken, name, params, body, hasVarargs, isTyped, returnType)
414@@ -551,7 +658,9 @@ func (p *Parser) parseFuncDecl(returnType *ast.BxType, nameToken token.Token) *a
415
416 func (p *Parser) parseAsmFuncDef(nameToken token.Token) *ast.Node {
417 name := nameToken.Value
418- if p.isTypeName(name) { util.Error(nameToken, "Function name '%s' shadows a type", name) }
419+ if p.isTypeName(name) {
420+ util.Error(nameToken, "Function name '%s' shadows a type", name)
421+ }
422
423 p.expect(token.Asm, "Expected '__asm__' keyword")
424 asmTok := p.previous
425@@ -579,7 +688,9 @@ func (p *Parser) parseAsmFuncDef(nameToken token.Token) *ast.Node {
426 func (p *Parser) parseTypeDecl() *ast.Node {
427 typeTok := p.previous
428
429- if p.match(token.Enum) { return p.parseEnumDef(typeTok) }
430+ if p.match(token.Enum) {
431+ return p.parseEnumDef(typeTok)
432+ }
433
434 if p.match(token.Struct) {
435 underlyingType := p.parseStructDef()
436@@ -602,9 +713,27 @@ func (p *Parser) parseTypeDecl() *ast.Node {
437 return ast.NewTypeDecl(typeTok, name, underlyingType)
438 }
439
440- util.Error(typeTok, "Expected 'struct' or 'enum' after 'type'")
441- p.advance()
442- return nil
443+ // Handle type alias: type <underlying_type> <new_name>;
444+ underlyingType := p.parseType()
445+ if underlyingType == nil {
446+ util.Error(p.current, "Expected a type for type alias after 'type'")
447+ for !p.check(token.Semi) && !p.check(token.EOF) {
448+ p.advance()
449+ }
450+ return nil
451+ }
452+
453+ p.expect(token.Ident, "Expected new type name for alias")
454+ nameToken := p.previous
455+ name := nameToken.Value
456+
457+ if p.isTypeName(name) {
458+ util.Error(nameToken, "Redefinition of type '%s'", name)
459+ }
460+ p.typeNames[name] = true
461+
462+ p.expect(token.Semi, "Expected ';' after type alias declaration")
463+ return ast.NewTypeDecl(typeTok, name, underlyingType)
464 }
465
466 func (p *Parser) parseEnumDef(typeTok token.Token) *ast.Node {
467@@ -640,7 +769,9 @@ func (p *Parser) parseEnumDef(typeTok token.Token) *ast.Node {
468
469 currentValue++
470
471- if !p.match(token.Comma) { break }
472+ if !p.match(token.Comma) {
473+ break
474+ }
475 }
476
477 p.expect(token.RBrace, "Expected '}' to close enum definition")
478@@ -658,10 +789,17 @@ func (p *Parser) parseTypedVarOrFuncDecl(isTopLevel bool) *ast.Node {
479 return p.parseTypedVarDeclBody(startTok, declType, p.previous)
480 }
481
482+ // Check for typed external declaration: type extrn function_name;
483+ if p.match(token.Extrn) {
484+ return p.parseTypedExtrnDecl(startTok, declType)
485+ }
486+
487 p.expect(token.Ident, "Expected identifier after type")
488 nameToken := p.previous
489
490- if p.check(token.LParen) { return p.parseFuncDecl(declType, nameToken) }
491+ if p.check(token.LParen) {
492+ return p.parseFuncDecl(declType, nameToken)
493+ }
494
495 return p.parseTypedVarDeclBody(startTok, declType, nameToken)
496 }
497@@ -678,7 +816,9 @@ func (p *Parser) parseTypedVarDeclBody(startTok token.Token, declType *ast.BxTyp
498
499 if p.match(token.LBracket) {
500 isArr, isBracketed = true, true
501- if !p.check(token.RBracket) { sizeExpr = p.parseExpr() }
502+ if !p.check(token.RBracket) {
503+ sizeExpr = p.parseExpr()
504+ }
505 p.expect(token.RBracket, "Expected ']' after array size")
506 finalType = &ast.BxType{Kind: ast.TYPE_ARRAY, Base: declType, ArraySize: sizeExpr, IsConst: declType.IsConst}
507 }
508@@ -692,7 +832,9 @@ func (p *Parser) parseTypedVarDeclBody(startTok token.Token, declType *ast.BxTyp
509
510 decls = append(decls, ast.NewVarDecl(currentNameToken, name, finalType, initList, sizeExpr, isArr, isBracketed, false))
511
512- if !p.match(token.Comma) { break }
513+ if !p.match(token.Comma) {
514+ break
515+ }
516
517 p.expect(token.Ident, "Expected identifier after comma in declaration list")
518 currentNameToken = p.previous
519@@ -700,12 +842,16 @@ func (p *Parser) parseTypedVarDeclBody(startTok token.Token, declType *ast.BxTyp
520
521 p.expect(token.Semi, "Expected ';' after typed variable declaration")
522
523- if len(decls) == 1 { return decls[0] }
524+ if len(decls) == 1 {
525+ return decls[0]
526+ }
527 return ast.NewMultiVarDecl(startTok, decls)
528 }
529
530 func (p *Parser) parseType() *ast.BxType {
531- if !p.isTypedPass { return nil }
532+ if !p.isTypedPass {
533+ return nil
534+ }
535
536 isConst := p.match(token.Const)
537 var baseType *ast.BxType
538@@ -789,7 +935,9 @@ func (p *Parser) parseStructDef() *ast.BxType {
539
540 if p.check(token.Ident) {
541 structType.StructTag = p.current.Value
542- if p.isTypedPass { p.typeNames[structType.StructTag] = true }
543+ if p.isTypedPass {
544+ p.typeNames[structType.StructTag] = true
545+ }
546 p.advance()
547 }
548
549@@ -821,7 +969,9 @@ func (p *Parser) parseStructDef() *ast.BxType {
550 }
551
552 p.expect(token.RBrace, "Expected '}' to close struct definition")
553- if structType.StructTag != "" { structType.Name = structType.StructTag }
554+ if structType.StructTag != "" {
555+ structType.Name = structType.StructTag
556+ }
557 return structType
558 }
559
560@@ -829,14 +979,24 @@ func (p *Parser) isTypedParameterList() bool {
561 originalPos, originalCurrent := p.pos, p.current
562 defer func() { p.pos, p.current = originalPos, originalCurrent }()
563
564- if p.check(token.RParen) { return false }
565- if p.check(token.Void) && p.peek().Type == token.RParen { return true }
566- if p.isBuiltinType(p.current) || p.isTypeName(p.current.Value) { return true }
567+ if p.check(token.RParen) {
568+ return false
569+ }
570+ if p.check(token.Void) && p.peek().Type == token.RParen {
571+ return true
572+ }
573+ if p.isBuiltinType(p.current) || p.isTypeName(p.current.Value) {
574+ return true
575+ }
576
577 for {
578- if !p.check(token.Ident) { return false }
579+ if !p.check(token.Ident) {
580+ return false
581+ }
582 p.advance()
583- if !p.match(token.Comma) { break }
584+ if !p.match(token.Comma) {
585+ break
586+ }
587 }
588 return p.isBuiltinType(p.current) || p.isTypeName(p.current.Value) || p.check(token.LBracket) || p.check(token.Star)
589 }
590@@ -852,7 +1012,9 @@ func (p *Parser) parseUntypedParameters() ([]*ast.Node, bool) {
591 }
592 p.expect(token.Ident, "Expected parameter name or '...'")
593 params = append(params, ast.NewIdent(p.previous, p.previous.Value))
594- if !p.match(token.Comma) { break }
595+ if !p.match(token.Comma) {
596+ break
597+ }
598 }
599 }
600 return params, hasVarargs
601@@ -861,14 +1023,18 @@ func (p *Parser) parseUntypedParameters() ([]*ast.Node, bool) {
602 func (p *Parser) parseTypedParameters() ([]*ast.Node, bool) {
603 var params []*ast.Node
604 var hasVarargs bool
605- if p.check(token.RParen) { return params, false }
606+ if p.check(token.RParen) {
607+ return params, false
608+ }
609 if p.check(token.Void) && p.peek().Type == token.RParen {
610 p.advance()
611 return params, false
612 }
613
614 for {
615- if p.check(token.RParen) { break }
616+ if p.check(token.RParen) {
617+ break
618+ }
619 if p.match(token.Dots) {
620 hasVarargs = true
621 break
622@@ -900,7 +1066,9 @@ func (p *Parser) parseTypedParameters() ([]*ast.Node, bool) {
623 }
624 }
625
626- if !p.match(token.Comma) { break }
627+ if !p.match(token.Comma) {
628+ break
629+ }
630 }
631 return params, hasVarargs
632 }
633@@ -917,16 +1085,73 @@ func getBinaryOpPrecedence(op token.Type) int {
634 case token.Shl, token.Shr: return 11
635 case token.Plus, token.Minus: return 12
636 case token.Star, token.Slash, token.Rem: return 13
637- default: return -1
638+ default:
639+ return -1
640 }
641 }
642
643 func (p *Parser) parseExpr() *ast.Node { return p.parseAssignmentExpr() }
644
645 func (p *Parser) parseAssignmentExpr() *ast.Node {
646+ // Try to detect multi-assignment pattern: lhs1, lhs2, ... = rhs1, rhs2, ...
647+ startPos := p.pos
648+ startCurrent := p.current
649+
650+ // Parse first expression
651 left := p.parseTernaryExpr()
652+
653+ // Check if this could be a multi-assignment (comma followed by more expressions then equals)
654+ if p.check(token.Comma) {
655+ var lhsList []*ast.Node
656+ lhsList = append(lhsList, left)
657+
658+ // Parse comma-separated lvalues
659+ for p.match(token.Comma) {
660+ expr := p.parseTernaryExpr()
661+ lhsList = append(lhsList, expr)
662+ }
663+
664+ // Check if we have an assignment operator
665+ if op := p.current.Type; op >= token.Eq && op <= token.EqShr {
666+ // Validate all lhs expressions are lvalues
667+ for _, lhs := range lhsList {
668+ if !isLValue(lhs) {
669+ util.Error(p.current, "Invalid target for assignment")
670+ }
671+ }
672+
673+ tok := p.current
674+ p.advance()
675+
676+ // Parse comma-separated rvalues
677+ var rhsList []*ast.Node
678+ for {
679+ rhs := p.parseAssignmentExpr()
680+ rhsList = append(rhsList, rhs)
681+ if !p.match(token.Comma) {
682+ break
683+ }
684+ }
685+
686+ // Check that number of lhs and rhs match
687+ if len(lhsList) != len(rhsList) {
688+ util.Error(tok, "Mismatched number of variables and values in assignment (%d vs %d)", len(lhsList), len(rhsList))
689+ }
690+
691+ return ast.NewMultiAssign(tok, op, lhsList, rhsList)
692+ }
693+
694+ // Not a multi-assignment, backtrack and treat as comma expression
695+ p.pos = startPos
696+ p.current = startCurrent
697+ left = p.parseTernaryExpr()
698+ }
699+
700+ // Regular single assignment
701 if op := p.current.Type; op >= token.Eq && op <= token.EqShr {
702- if !isLValue(left) { util.Error(p.current, "Invalid target for assignment") }
703+ if !isLValue(left) {
704+ util.Error(p.current, "Invalid target for assignment")
705+ }
706 tok := p.current
707 p.advance()
708 right := p.parseAssignmentExpr()
709@@ -950,10 +1175,14 @@ func (p *Parser) parseTernaryExpr() *ast.Node {
710 func (p *Parser) parseBinaryExpr(minPrec int) *ast.Node {
711 left := p.parseUnaryExpr()
712 for {
713- if left == nil { return nil }
714+ if left == nil {
715+ return nil
716+ }
717 op := p.current.Type
718 prec := getBinaryOpPrecedence(op)
719- if prec < minPrec { break }
720+ if prec < minPrec {
721+ break
722+ }
723 opTok := p.current
724 p.advance()
725 right := p.parseBinaryExpr(prec + 1)
726@@ -968,9 +1197,13 @@ func (p *Parser) parseUnaryExpr() *ast.Node {
727 op, opToken := p.previous.Type, p.previous
728 operand := p.parseUnaryExpr()
729
730- if op == token.Star { return ast.NewIndirection(tok, operand) }
731+ if op == token.Star {
732+ return ast.NewIndirection(tok, operand)
733+ }
734 if op == token.And {
735- if !isLValue(operand) { util.Error(opToken, "Address-of operator '&' requires an l-value") }
736+ if !isLValue(operand) {
737+ util.Error(opToken, "Address-of operator '&' requires an l-value")
738+ }
739 return ast.NewAddressOf(tok, operand)
740 }
741 if (op == token.Inc || op == token.Dec) && !isLValue(operand) {
742@@ -984,14 +1217,18 @@ func (p *Parser) parseUnaryExpr() *ast.Node {
743 func (p *Parser) parsePostfixExpr() *ast.Node {
744 expr := p.parsePrimaryExpr()
745 for {
746- if expr == nil { return nil }
747+ if expr == nil {
748+ return nil
749+ }
750 tok := p.current
751 if p.match(token.LParen) {
752 var args []*ast.Node
753 if !p.check(token.RParen) {
754 for {
755 args = append(args, p.parseAssignmentExpr())
756- if !p.match(token.Comma) { break }
757+ if !p.match(token.Comma) {
758+ break
759+ }
760 }
761 }
762 p.expect(token.RParen, "Expected ')' after function arguments")
763@@ -1005,7 +1242,9 @@ func (p *Parser) parsePostfixExpr() *ast.Node {
764 member := ast.NewIdent(p.previous, p.previous.Value)
765 expr = ast.NewMemberAccess(tok, expr, member)
766 } else if p.match(token.Inc, token.Dec) {
767- if !isLValue(expr) { util.Error(p.previous, "Postfix '++' or '--' requires an l-value") }
768+ if !isLValue(expr) {
769+ util.Error(p.previous, "Postfix '++' or '--' requires an l-value")
770+ }
771 expr = ast.NewPostfixOp(p.previous, p.previous.Type, expr)
772 } else {
773 break
774@@ -1025,28 +1264,52 @@ func (p *Parser) parseStructLiteral(typeNode *ast.Node) *ast.Node {
775 for !p.check(token.RBrace) && !p.check(token.EOF) {
776 if p.check(token.Ident) && p.peek().Type == token.Colon {
777 hasNames = true
778- if hasPositional { util.Error(p.current, "Cannot mix named and positional fields in struct literal") }
779+ if hasPositional {
780+ util.Error(p.current, "Cannot mix named and positional fields in struct literal")
781+ }
782 p.expect(token.Ident, "Expected field name")
783 names = append(names, ast.NewIdent(p.previous, p.previous.Value))
784 p.expect(token.Colon, "Expected ':' after field name")
785 values = append(values, p.parseAssignmentExpr())
786 } else {
787 hasPositional = true
788- if hasNames { util.Error(p.current, "Cannot mix named and positional fields in struct literal") }
789+ if hasNames {
790+ util.Error(p.current, "Cannot mix named and positional fields in struct literal")
791+ }
792 names = append(names, nil)
793 values = append(values, p.parseAssignmentExpr())
794 }
795
796- if !p.match(token.Comma) { break }
797+ if !p.match(token.Comma) {
798+ break
799+ }
800 }
801
802 p.expect(token.RBrace, "Expected '}' to close struct literal")
803
804- if hasPositional && !hasNames { names = nil }
805+ if hasPositional && !hasNames {
806+ names = nil
807+ }
808
809 return ast.NewStructLiteral(startTok, typeNode, values, names)
810 }
811
812+func (p *Parser) parseArrayLiteral(startTok token.Token, elemType *ast.BxType) *ast.Node {
813+ p.expect(token.LBrace, "Expected '{' for array literal")
814+
815+ var values []*ast.Node
816+ for !p.check(token.RBrace) && !p.check(token.EOF) {
817+ values = append(values, p.parseAssignmentExpr())
818+ if !p.match(token.Comma) {
819+ break
820+ }
821+ }
822+
823+ p.expect(token.RBrace, "Expected '}' to close array literal")
824+
825+ return ast.NewArrayLiteral(startTok, elemType, values)
826+}
827+
828 func (p *Parser) parsePrimaryExpr() *ast.Node {
829 tok := p.current
830 if p.match(token.Number) {
831@@ -1054,7 +1317,9 @@ func (p *Parser) parsePrimaryExpr() *ast.Node {
832 val, err := strconv.ParseInt(valStr, 0, 64)
833 if err != nil {
834 uval, uerr := strconv.ParseUint(valStr, 0, 64)
835- if uerr != nil { util.Error(tok, "Invalid integer literal: %s", valStr) }
836+ if uerr != nil {
837+ util.Error(tok, "Invalid integer literal: %s", valStr)
838+ }
839 val = int64(uval)
840 }
841 return ast.NewNumber(tok, val)
842@@ -1063,8 +1328,12 @@ func (p *Parser) parsePrimaryExpr() *ast.Node {
843 val, _ := strconv.ParseFloat(p.previous.Value, 64)
844 return ast.NewFloatNumber(tok, val)
845 }
846- if p.match(token.String) { return ast.NewString(tok, p.previous.Value) }
847- if p.match(token.Nil) { return ast.NewNil(tok) }
848+ if p.match(token.String) {
849+ return ast.NewString(tok, p.previous.Value)
850+ }
851+ if p.match(token.Nil) {
852+ return ast.NewNil(tok)
853+ }
854 if p.match(token.Null) {
855 util.Warn(p.cfg, config.WarnExtra, tok, "Use of 'null' is discouraged, prefer 'nil' for idiomatic Bx code")
856 return ast.NewNil(tok)
857@@ -1088,8 +1357,15 @@ func (p *Parser) parsePrimaryExpr() *ast.Node {
858 util.Warn(p.cfg, config.WarnExtra, p.previous, "Using keyword 'type' as an identifier")
859 return ast.NewIdent(tok, "type")
860 }
861+ if p.match(token.TypeOf) {
862+ p.expect(token.LParen, "Expected '(' after 'typeof'")
863+ expr := p.parseExpr()
864+ p.expect(token.RParen, "Expected ')' after typeof expression")
865+ return ast.NewTypeOf(tok, expr)
866+ }
867 if p.match(token.LParen) {
868- if p.isTypedPass && (p.isBuiltinType(p.current) || p.isTypeName(p.current.Value)) {
869+ // Only allow C-style casts for pointer types, not simple scalar types
870+ if p.isTypedPass && p.isPointerCastAhead() {
871 castType := p.parseType()
872 p.expect(token.RParen, "Expected ')' after type in cast")
873 exprToCast := p.parseUnaryExpr()
874@@ -1111,8 +1387,60 @@ func (p *Parser) parsePrimaryExpr() *ast.Node {
875 p.current = p.previous
876 }
877
878+ // Handle array literals: []type{ ... }
879+ if p.isTypedPass && p.match(token.LBracket) {
880+ arrayTok := p.previous
881+ p.expect(token.RBracket, "Expected ']' for array literal")
882+ if p.isBuiltinType(p.current) || p.isTypeName(p.current.Value) || p.check(token.Star) {
883+ elemType := p.parseType()
884+ if elemType != nil && p.check(token.LBrace) {
885+ return p.parseArrayLiteral(arrayTok, elemType)
886+ }
887+ }
888+ // Not an array literal, backtrack
889+ util.Error(arrayTok, "Expected type after '[]' for array literal")
890+ return nil
891+ }
892+
893 if !p.check(token.EOF) && !p.check(token.RBrace) && !p.check(token.Semi) {
894 util.Error(tok, "Expected an expression")
895 }
896 return nil
897 }
898+
899+// typeFromName converts a type name string to a BxType
900+func (p *Parser) typeFromName(name string) *ast.BxType {
901+ // Check if it's a built-in type keyword
902+ if tokType, isKeyword := token.KeywordMap[name]; isKeyword {
903+ if tokType == token.Void {
904+ return ast.TypeVoid
905+ } else if tokType == token.StringKeyword {
906+ return ast.TypeString
907+ } else if tokType >= token.Float && tokType <= token.Float64 {
908+ return &ast.BxType{Kind: ast.TYPE_FLOAT, Name: name}
909+ } else if tokType >= token.Byte && tokType <= token.Any {
910+ return &ast.BxType{Kind: ast.TYPE_PRIMITIVE, Name: name}
911+ }
912+ }
913+
914+ // Check if it's a user-defined type name
915+ if p.isTypeName(name) {
916+ return &ast.BxType{Kind: ast.TYPE_PRIMITIVE, Name: name}
917+ }
918+
919+ return nil
920+}
921+
922+// parseTypedExtrnDecl parses typed external function declarations like "tm extrn localtime;"
923+func (p *Parser) parseTypedExtrnDecl(typeTok token.Token, returnType *ast.BxType) *ast.Node {
924+ var names []*ast.Node
925+ for {
926+ p.expect(token.Ident, "Expected identifier in typed 'extrn' declaration")
927+ names = append(names, ast.NewIdent(p.previous, p.previous.Value))
928+ if !p.match(token.Comma) {
929+ break
930+ }
931+ }
932+ p.expect(token.Semi, "Expected ';' after typed 'extrn' declaration")
933+ return ast.NewExtrnDecl(typeTok, names, returnType)
934+}
+3,
-1
1@@ -47,6 +47,7 @@ const (
2 Float64
3 StringKeyword
4 Any
5+ TypeOf
6 LParen
7 RParen
8 LBrace
9@@ -143,9 +144,10 @@ var KeywordMap = map[string]Type{
10 "float64": Float64,
11 "string": StringKeyword,
12 "any": Any,
13+ "typeof": TypeOf,
14 }
15
16-// Reverse mapping from Type to the keyword string.
17+// Reverse mapping from Type to the keyword string
18 var TypeStrings = make(map[Type]string)
19
20 func init() {
+546,
-180
1@@ -2,10 +2,10 @@ package typeChecker
2
3 import (
4 "fmt"
5- "strings"
6
7 "github.com/xplshn/gbc/pkg/ast"
8 "github.com/xplshn/gbc/pkg/config"
9+ "github.com/xplshn/gbc/pkg/ir"
10 "github.com/xplshn/gbc/pkg/token"
11 "github.com/xplshn/gbc/pkg/util"
12 )
13@@ -19,10 +19,7 @@ type Symbol struct {
14 Next *Symbol
15 }
16
17-type Scope struct {
18- Symbols *Symbol
19- Parent *Scope
20-}
21+type Scope struct { Symbols *Symbol; Parent *Scope }
22
23 type TypeChecker struct {
24 currentScope *Scope
25@@ -45,18 +42,16 @@ func NewTypeChecker(cfg *config.Config) *TypeChecker {
26 }
27
28 func newScope(parent *Scope) *Scope { return &Scope{Parent: parent} }
29-func (tc *TypeChecker) enterScope() { tc.currentScope = newScope(tc.currentScope) }
30+func (tc *TypeChecker) enterScope() { tc.currentScope = newScope(tc.currentScope) }
31 func (tc *TypeChecker) exitScope() {
32- if tc.currentScope.Parent != nil {
33- tc.currentScope = tc.currentScope.Parent
34- }
35+ if tc.currentScope.Parent != nil { tc.currentScope = tc.currentScope.Parent }
36 }
37
38 func (tc *TypeChecker) typeErrorOrWarn(tok token.Token, format string, args ...interface{}) {
39- if tc.cfg.IsFeatureEnabled(config.FeatStrictTypes) {
40+ if !tc.cfg.IsFeatureEnabled(config.FeatPromTypes) {
41 util.Error(tok, format, args...)
42 } else {
43- util.Warn(tc.cfg, config.WarnType, tok, format, args...)
44+ util.Warn(tc.cfg, config.WarnPromTypes, tok, format, args...)
45 }
46 }
47
48@@ -66,12 +61,9 @@ func (tc *TypeChecker) addSymbol(node *ast.Node) *Symbol {
49 isFunc, isType := false, false
50
51 switch d := node.Data.(type) {
52- case ast.VarDeclNode:
53- name, typ = d.Name, d.Type
54- case ast.FuncDeclNode:
55- name, typ, isFunc = d.Name, d.ReturnType, true
56- case ast.TypeDeclNode:
57- name, typ, isType = d.Name, d.Type, true
58+ case ast.VarDeclNode: name, typ = d.Name, d.Type
59+ case ast.FuncDeclNode: name, typ, isFunc = d.Name, d.ReturnType, true
60+ case ast.TypeDeclNode: name, typ, isType = d.Name, d.Type, true
61 case ast.EnumDeclNode:
62 name, isType = d.Name, true
63 typ = &ast.BxType{Kind: ast.TYPE_ENUM, Name: d.Name, EnumMembers: d.Members, Base: ast.TypeInt}
64@@ -90,13 +82,14 @@ func (tc *TypeChecker) addSymbol(node *ast.Node) *Symbol {
65 for _, nameNode := range d.Names {
66 ident := nameNode.Data.(ast.IdentNode)
67 if tc.findSymbol(ident.Name, false) == nil {
68- sym := &Symbol{Name: ident.Name, Type: ast.TypeUntyped, IsFunc: true, Node: node, Next: tc.currentScope.Symbols}
69+ symbolType := ast.TypeUntyped
70+ if d.ReturnType != nil { symbolType = d.ReturnType }
71+ sym := &Symbol{Name: ident.Name, Type: symbolType, IsFunc: true, Node: node, Next: tc.currentScope.Symbols}
72 tc.currentScope.Symbols = sym
73 }
74 }
75 return nil
76- case ast.IdentNode:
77- name, typ = d.Name, ast.TypeUntyped
78+ case ast.IdentNode: name, typ = d.Name, ast.TypeUntyped
79 default:
80 return nil
81 }
82@@ -105,11 +98,9 @@ func (tc *TypeChecker) addSymbol(node *ast.Node) *Symbol {
83
84 if existing := tc.findSymbol(name, isType); existing != nil && tc.currentScope == tc.globalScope {
85 isExistingExtrn := existing.Node != nil && existing.Node.Type == ast.ExtrnDecl
86- if isExistingExtrn || (existing.IsFunc && !isFunc && existing.Type.Kind == ast.TYPE_UNTYPED) {
87- existing.Type, existing.IsFunc, existing.IsType, existing.Node = typ, isFunc, isType, node
88- return existing
89+ if !isExistingExtrn && !(existing.IsFunc && !isFunc && existing.Type.Kind == ast.TYPE_UNTYPED) {
90+ util.Error(node.Tok, "Redefinition of '%s'", name)
91 }
92- util.Error(node.Tok, "Redefinition of '%s'", name)
93 existing.Type, existing.IsFunc, existing.IsType, existing.Node = typ, isFunc, isType, node
94 return existing
95 }
96@@ -120,36 +111,59 @@ func (tc *TypeChecker) addSymbol(node *ast.Node) *Symbol {
97 }
98
99 func (tc *TypeChecker) findSymbol(name string, findTypes bool) *Symbol {
100+ return tc.findSymbolInScopes(name, findTypes, false)
101+}
102+
103+func (tc *TypeChecker) findSymbolInCurrentScope(name string, findTypes bool) *Symbol {
104+ return tc.findSymbolInScopes(name, findTypes, true)
105+}
106+
107+func (tc *TypeChecker) findSymbolInScopes(name string, findTypes, currentOnly bool) *Symbol {
108 for s := tc.currentScope; s != nil; s = s.Parent {
109 for sym := s.Symbols; sym != nil; sym = sym.Next {
110 if sym.Name == name && sym.IsType == findTypes {
111 return sym
112 }
113 }
114+ if currentOnly {
115+ break
116+ }
117 }
118 return nil
119 }
120
121 func (tc *TypeChecker) getAlignof(typ *ast.BxType) int64 {
122- if typ == nil { return int64(tc.wordSize) }
123+ if typ == nil {
124+ return int64(tc.wordSize)
125+ }
126
127 if (typ.Kind == ast.TYPE_PRIMITIVE || typ.Kind == ast.TYPE_STRUCT) && typ.Name != "" {
128 if sym := tc.findSymbol(typ.Name, true); sym != nil {
129- if sym.Type != typ { return tc.getAlignof(sym.Type) }
130+ if sym.Type != typ {
131+ return tc.getAlignof(sym.Type)
132+ }
133 }
134 }
135
136- if typ.Kind == ast.TYPE_UNTYPED { return int64(tc.wordSize) }
137+ if typ.Kind == ast.TYPE_UNTYPED {
138+ return int64(tc.wordSize)
139+ }
140 switch typ.Kind {
141- case ast.TYPE_VOID: return 1
142- case ast.TYPE_POINTER: return int64(tc.wordSize)
143- case ast.TYPE_ARRAY: return tc.getAlignof(typ.Base)
144- case ast.TYPE_PRIMITIVE, ast.TYPE_FLOAT, ast.TYPE_ENUM: return tc.getSizeof(typ)
145+ case ast.TYPE_VOID:
146+ return 1
147+ case ast.TYPE_POINTER:
148+ return int64(tc.wordSize)
149+ case ast.TYPE_ARRAY:
150+ return tc.getAlignof(typ.Base)
151+ case ast.TYPE_PRIMITIVE, ast.TYPE_FLOAT, ast.TYPE_ENUM:
152+ return tc.getSizeof(typ)
153 case ast.TYPE_STRUCT:
154 var maxAlign int64 = 1
155 for _, field := range typ.Fields {
156 fieldAlign := tc.getAlignof(field.Data.(ast.VarDeclNode).Type)
157- if fieldAlign > maxAlign { maxAlign = fieldAlign }
158+ if fieldAlign > maxAlign {
159+ maxAlign = fieldAlign
160+ }
161 }
162 return maxAlign
163 }
164@@ -157,10 +171,14 @@ func (tc *TypeChecker) getAlignof(typ *ast.BxType) int64 {
165 }
166
167 func (tc *TypeChecker) getSizeof(typ *ast.BxType) int64 {
168- if typ == nil || typ.Kind == ast.TYPE_UNTYPED { return int64(tc.wordSize) }
169+ if typ == nil || typ.Kind == ast.TYPE_UNTYPED {
170+ return int64(tc.wordSize)
171+ }
172 switch typ.Kind {
173- case ast.TYPE_VOID: return 0
174- case ast.TYPE_POINTER: return int64(tc.wordSize)
175+ case ast.TYPE_VOID:
176+ return 0
177+ case ast.TYPE_POINTER:
178+ return int64(tc.wordSize)
179 case ast.TYPE_ARRAY:
180 elemSize := tc.getSizeof(typ.Base)
181 var arrayLen int64 = 1
182@@ -172,54 +190,67 @@ func (tc *TypeChecker) getSizeof(typ *ast.BxType) int64 {
183 }
184 }
185 return elemSize * arrayLen
186- case ast.TYPE_PRIMITIVE, ast.TYPE_UNTYPED_INT:
187- switch typ.Name {
188- case "int", "uint", "string": return int64(tc.wordSize)
189- case "int64", "uint64": return 8
190- case "int32", "uint32": return 4
191- case "int16", "uint16": return 2
192- case "byte", "bool", "int8", "uint8": return 1
193- default:
194- if sym := tc.findSymbol(typ.Name, true); sym != nil { return tc.getSizeof(sym.Type) }
195- return int64(tc.wordSize)
196+ case ast.TYPE_PRIMITIVE, ast.TYPE_LITERAL_INT:
197+ resolver := ir.NewTypeSizeResolver(tc.wordSize)
198+ if size := resolver.GetTypeSize(typ.Name); size > 0 {
199+ return size
200 }
201- case ast.TYPE_ENUM: return tc.getSizeof(ast.TypeInt)
202- case ast.TYPE_FLOAT, ast.TYPE_UNTYPED_FLOAT:
203- switch typ.Name {
204- case "float", "float32": return 4
205- case "float64": return 8
206- default: return 4
207+ // Fallback for user-defined types
208+ if sym := tc.findSymbol(typ.Name, true); sym != nil {
209+ return tc.getSizeof(sym.Type)
210 }
211+ return int64(tc.wordSize)
212+ case ast.TYPE_ENUM:
213+ return tc.getSizeof(ast.TypeInt)
214+ case ast.TYPE_FLOAT, ast.TYPE_LITERAL_FLOAT:
215+ resolver := ir.NewTypeSizeResolver(tc.wordSize)
216+ return resolver.GetTypeSize(typ.Name)
217 case ast.TYPE_STRUCT:
218 var totalSize, maxAlign int64 = 0, 1
219 for _, field := range typ.Fields {
220 fieldData := field.Data.(ast.VarDeclNode)
221 fieldAlign := tc.getAlignof(fieldData.Type)
222- if fieldAlign > maxAlign { maxAlign = fieldAlign }
223+ if fieldAlign > maxAlign {
224+ maxAlign = fieldAlign
225+ }
226 totalSize = util.AlignUp(totalSize, fieldAlign)
227 totalSize += tc.getSizeof(fieldData.Type)
228 }
229- if maxAlign == 0 { maxAlign = 1 }
230+ if maxAlign == 0 {
231+ maxAlign = 1
232+ }
233 return util.AlignUp(totalSize, maxAlign)
234 }
235 return int64(tc.wordSize)
236 }
237
238 func (tc *TypeChecker) Check(root *ast.Node) {
239- if !tc.cfg.IsFeatureEnabled(config.FeatTyped) { return }
240+ if !tc.cfg.IsFeatureEnabled(config.FeatTyped) {
241+ return
242+ }
243 tc.collectGlobals(root)
244 tc.checkNode(root)
245 tc.annotateGlobalDecls(root)
246 }
247
248 func (tc *TypeChecker) collectGlobals(node *ast.Node) {
249- if node == nil || node.Type != ast.Block { return }
250+ if node == nil || node.Type != ast.Block {
251+ return
252+ }
253 for _, stmt := range node.Data.(ast.BlockNode).Stmts {
254 switch stmt.Type {
255- case ast.VarDecl, ast.FuncDecl, ast.ExtrnDecl, ast.TypeDecl, ast.EnumDecl:
256+ case ast.VarDecl:
257+ if stmt.Data.(ast.VarDeclNode).IsDefine {
258+ continue
259+ }
260+ tc.addSymbol(stmt)
261+ case ast.FuncDecl, ast.ExtrnDecl, ast.TypeDecl, ast.EnumDecl:
262 tc.addSymbol(stmt)
263 case ast.MultiVarDecl:
264 for _, subStmt := range stmt.Data.(ast.MultiVarDeclNode).Decls {
265+ if subStmt.Data.(ast.VarDeclNode).IsDefine {
266+ continue
267+ }
268 tc.addSymbol(subStmt)
269 }
270 }
271@@ -227,11 +258,15 @@ func (tc *TypeChecker) collectGlobals(node *ast.Node) {
272 }
273
274 func (tc *TypeChecker) annotateGlobalDecls(root *ast.Node) {
275- if root == nil || root.Type != ast.Block { return }
276+ if root == nil || root.Type != ast.Block {
277+ return
278+ }
279 for _, stmt := range root.Data.(ast.BlockNode).Stmts {
280 if stmt.Type == ast.VarDecl {
281 d, ok := stmt.Data.(ast.VarDeclNode)
282- if !ok { continue }
283+ if !ok {
284+ continue
285+ }
286 if globalSym := tc.findSymbol(d.Name, false); globalSym != nil {
287 if (d.Type == nil || d.Type.Kind == ast.TYPE_UNTYPED) && (globalSym.Type != nil && globalSym.Type.Kind != ast.TYPE_UNTYPED) {
288 d.Type = globalSym.Type
289@@ -243,15 +278,21 @@ func (tc *TypeChecker) annotateGlobalDecls(root *ast.Node) {
290 }
291
292 func (tc *TypeChecker) checkNode(node *ast.Node) {
293- if node == nil { return }
294+ if node == nil {
295+ return
296+ }
297 switch node.Type {
298 case ast.Block:
299 d := node.Data.(ast.BlockNode)
300- if !d.IsSynthetic { tc.enterScope() }
301+ if !d.IsSynthetic {
302+ tc.enterScope()
303+ }
304 for _, stmt := range d.Stmts {
305 tc.checkNode(stmt)
306 }
307- if !d.IsSynthetic { tc.exitScope() }
308+ if !d.IsSynthetic {
309+ tc.exitScope()
310+ }
311 case ast.FuncDecl:
312 tc.checkFuncDecl(node)
313 case ast.VarDecl:
314@@ -296,7 +337,9 @@ func (tc *TypeChecker) checkNode(node *ast.Node) {
315
316 func (tc *TypeChecker) checkFuncDecl(node *ast.Node) {
317 d := node.Data.(ast.FuncDeclNode)
318- if d.Body == nil || d.Body.Type == ast.AsmStmt { return }
319+ if d.Body == nil || d.Body.Type == ast.AsmStmt {
320+ return
321+ }
322 prevFunc := tc.currentFunc
323 tc.currentFunc = &d
324 defer func() { tc.currentFunc = prevFunc }()
325@@ -310,10 +353,16 @@ func (tc *TypeChecker) checkFuncDecl(node *ast.Node) {
326
327 func (tc *TypeChecker) checkVarDecl(node *ast.Node) {
328 d := node.Data.(ast.VarDeclNode)
329- if d.IsDefine && tc.findSymbol(d.Name, false) != nil {
330- util.Error(node.Tok, "Trying to assign to undeclared identifier, use := or define with a explicit type or auto")
331+ if d.IsDefine {
332+ if sym := tc.findSymbolInCurrentScope(d.Name, false); sym != nil {
333+ util.Error(node.Tok, "no new variables on left side of := (redeclaration of '%s')", d.Name)
334+ } else {
335+ tc.addSymbol(node)
336+ }
337+ } else if tc.currentFunc != nil && tc.findSymbolInCurrentScope(d.Name, false) == nil {
338+ tc.addSymbol(node)
339 }
340- if tc.currentFunc != nil { tc.addSymbol(node) }
341+
342 if len(d.InitList) == 0 {
343 if (d.Type == nil || d.Type.Kind == ast.TYPE_UNTYPED) && !tc.cfg.IsFeatureEnabled(config.FeatAllowUninitialized) {
344 util.Error(node.Tok, "Uninitialized variable '%s' is not allowed in this mode", d.Name)
345@@ -323,8 +372,51 @@ func (tc *TypeChecker) checkVarDecl(node *ast.Node) {
346 }
347
348 initExpr := d.InitList[0]
349+
350+ if d.IsDefine && (d.Type == nil || d.Type.Kind == ast.TYPE_UNTYPED) {
351+ if structTypeSym := tc.findSymbol(d.Name, true); structTypeSym != nil && structTypeSym.IsType {
352+ structType := tc.resolveType(structTypeSym.Type)
353+ if structType.Kind == ast.TYPE_STRUCT {
354+ var operandExpr *ast.Node
355+ if initExpr.Type == ast.UnaryOp {
356+ unaryOp := initExpr.Data.(ast.UnaryOpNode)
357+ if unaryOp.Op == token.Star {
358+ operandExpr = unaryOp.Expr
359+ }
360+ } else if initExpr.Type == ast.Indirection {
361+ indirOp := initExpr.Data.(ast.IndirectionNode)
362+ operandExpr = indirOp.Expr
363+ }
364+
365+ if operandExpr != nil {
366+ operandType := tc.checkExpr(operandExpr)
367+ resolvedOpType := tc.resolveType(operandType)
368+ if resolvedOpType.Kind == ast.TYPE_UNTYPED || tc.isIntegerType(resolvedOpType) {
369+ promotedType := &ast.BxType{Kind: ast.TYPE_POINTER, Base: structType}
370+ operandExpr.Typ = promotedType
371+ if operandExpr.Type == ast.Ident {
372+ if sym := tc.findSymbol(operandExpr.Data.(ast.IdentNode).Name, false); sym != nil {
373+ sym.Type = promotedType
374+ }
375+ }
376+ initExpr.Typ = structType
377+ d.Type = structType
378+ node.Data = d
379+ if sym := tc.findSymbol(d.Name, false); sym != nil {
380+ sym.Type = structType
381+ }
382+ node.Typ = structType
383+ return
384+ }
385+ }
386+ }
387+ }
388+ }
389+
390 initType := tc.checkExpr(initExpr)
391- if initType == nil { return }
392+ if initType == nil {
393+ return
394+ }
395
396 if d.Type == nil || d.Type.Kind == ast.TYPE_UNTYPED {
397 d.Type = initType
398@@ -332,14 +424,21 @@ func (tc *TypeChecker) checkVarDecl(node *ast.Node) {
399 if sym := tc.findSymbol(d.Name, false); sym != nil {
400 sym.Type = initType
401 }
402- } else if initType.Kind == ast.TYPE_UNTYPED_INT || initType.Kind == ast.TYPE_UNTYPED_FLOAT {
403+ if tc.cfg.IsWarningEnabled(config.WarnDebugComp) {
404+ if d.IsDefine {
405+ util.Warn(tc.cfg, config.WarnDebugComp, node.Tok, "Guessing (:=) is %s of type: '%s'", d.Name, ast.TypeToString(initType))
406+ } else {
407+ util.Warn(tc.cfg, config.WarnDebugComp, node.Tok, "Guessing (auto) is %s of type: '%s'", d.Name, ast.TypeToString(initType))
408+ }
409+ }
410+ } else if initType.Kind == ast.TYPE_LITERAL_INT || initType.Kind == ast.TYPE_LITERAL_FLOAT {
411 if tc.isNumericType(d.Type) || d.Type.Kind == ast.TYPE_POINTER || d.Type.Kind == ast.TYPE_BOOL {
412 initExpr.Typ = d.Type
413 initType = d.Type
414 }
415 }
416 if !tc.areTypesCompatible(d.Type, initType, initExpr) {
417- tc.typeErrorOrWarn(node.Tok, "Initializing variable of type '%s' with expression of incompatible type '%s'", typeToString(d.Type), typeToString(initType))
418+ tc.typeErrorOrWarn(node.Tok, "Initializing variable of type '%s' with expression of incompatible type '%s'", ast.TypeToString(d.Type), ast.TypeToString(initType))
419 }
420 node.Typ = d.Type
421 }
422@@ -347,7 +446,9 @@ func (tc *TypeChecker) checkVarDecl(node *ast.Node) {
423 func (tc *TypeChecker) isSymbolLocal(name string) bool {
424 for s := tc.currentScope; s != nil && s != tc.globalScope; s = s.Parent {
425 for sym := s.Symbols; sym != nil; sym = sym.Next {
426- if sym.Name == name && !sym.IsType { return true }
427+ if sym.Name == name && !sym.IsType {
428+ return true
429+ }
430 }
431 }
432 return false
433@@ -356,7 +457,9 @@ func (tc *TypeChecker) isSymbolLocal(name string) bool {
434 func (tc *TypeChecker) checkReturn(node *ast.Node) {
435 d := node.Data.(ast.ReturnNode)
436 if tc.currentFunc == nil {
437- if d.Expr != nil { util.Error(node.Tok, "Return with value used outside of a function") }
438+ if d.Expr != nil {
439+ util.Error(node.Tok, "Return with value used outside of a function")
440+ }
441 return
442 }
443
444@@ -384,33 +487,57 @@ func (tc *TypeChecker) checkReturn(node *ast.Node) {
445 retType := tc.currentFunc.ReturnType
446 if d.Expr == nil {
447 if retType.Kind != ast.TYPE_VOID {
448- util.Error(node.Tok, "Return with no value in function returning non-void type ('%s')", typeToString(retType))
449+ util.Error(node.Tok, "Return with no value in function returning non-void type ('%s')", ast.TypeToString(retType))
450 }
451 } else {
452 exprType := tc.checkExpr(d.Expr)
453 if retType.Kind == ast.TYPE_VOID {
454 util.Error(node.Tok, "Return with a value in function returning void")
455 } else if !tc.areTypesCompatible(retType, exprType, d.Expr) {
456- tc.typeErrorOrWarn(node.Tok, "Returning type '%s' is incompatible with function return type '%s'", typeToString(exprType), typeToString(retType))
457+ tc.typeErrorOrWarn(node.Tok, "Returning type '%s' is incompatible with function return type '%s'", ast.TypeToString(exprType), ast.TypeToString(retType))
458 }
459 }
460 }
461
462 func (tc *TypeChecker) checkExprAsCondition(node *ast.Node) {
463 typ := tc.checkExpr(node)
464- if !(tc.isScalarType(typ) || typ.Kind == ast.TYPE_UNTYPED || typ.Kind == ast.TYPE_UNTYPED_INT) {
465- util.Warn(tc.cfg, config.WarnType, node.Tok, "Expression of type '%s' used as a condition", typeToString(typ))
466+ if !(tc.isScalarType(typ) || typ.Kind == ast.TYPE_UNTYPED || typ.Kind == ast.TYPE_LITERAL_INT) {
467+ util.Warn(tc.cfg, config.WarnPromTypes, node.Tok, "Expression of type '%s' used as a condition", ast.TypeToString(typ))
468 }
469 }
470
471 func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
472- if node == nil { return ast.TypeUntyped }
473- if node.Typ != nil && node.Typ.Kind != ast.TYPE_UNTYPED_INT && node.Typ.Kind != ast.TYPE_UNTYPED_FLOAT {
474+ if node == nil {
475+ return ast.TypeUntyped
476+ }
477+ if node.Typ != nil && node.Typ.Kind != ast.TYPE_LITERAL_INT && node.Typ.Kind != ast.TYPE_LITERAL_FLOAT {
478 return node.Typ
479 }
480 var typ *ast.BxType
481 switch d := node.Data.(type) {
482 case ast.AssignNode:
483+ if d.Op == token.Define {
484+ if d.Lhs.Type != ast.Ident {
485+ util.Error(node.Tok, "Cannot declare non-identifier with ':='")
486+ typ = ast.TypeUntyped
487+ break
488+ }
489+ name := d.Lhs.Data.(ast.IdentNode).Name
490+ if sym := tc.findSymbolInCurrentScope(name, false); sym != nil {
491+ util.Error(node.Tok, "no new variables on left side of := (redeclaration of '%s')", name)
492+ typ = sym.Type
493+ break
494+ }
495+
496+ rhsType := tc.checkExpr(d.Rhs)
497+ varDeclNode := ast.NewVarDecl(d.Lhs.Tok, name, rhsType, []*ast.Node{d.Rhs}, nil, false, false, true)
498+ tc.addSymbol(varDeclNode)
499+ node.Type = ast.VarDecl
500+ node.Data = varDeclNode.Data
501+ node.Typ = rhsType
502+ return rhsType
503+ }
504+
505 lhsType, rhsType := tc.checkExpr(d.Lhs), tc.checkExpr(d.Rhs)
506
507 isLhsScalar := tc.isScalarType(lhsType) && lhsType.Kind != ast.TYPE_POINTER
508@@ -442,8 +569,8 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
509 }
510 }
511 if !tc.areTypesCompatible(lhsType, rhsType, d.Rhs) {
512- tc.typeErrorOrWarn(node.Tok, "Assigning to type '%s' from incompatible type '%s'", typeToString(lhsType), typeToString(rhsType))
513- } else if rhsType.Kind == ast.TYPE_UNTYPED_INT || rhsType.Kind == ast.TYPE_UNTYPED_FLOAT {
514+ tc.typeErrorOrWarn(node.Tok, "Assigning to type '%s' from incompatible type '%s'", ast.TypeToString(lhsType), ast.TypeToString(rhsType))
515+ } else if rhsType.Kind == ast.TYPE_LITERAL_INT || rhsType.Kind == ast.TYPE_LITERAL_FLOAT {
516 if tc.isNumericType(lhsType) || lhsType.Kind == ast.TYPE_POINTER || lhsType.Kind == ast.TYPE_BOOL {
517 d.Rhs.Typ = lhsType
518 }
519@@ -456,6 +583,7 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
520 operandType := tc.checkExpr(d.Expr)
521 switch d.Op {
522 case token.Star:
523+ operandType := tc.checkExpr(d.Expr)
524 resolvedOpType := tc.resolveType(operandType)
525 if resolvedOpType.Kind == ast.TYPE_POINTER || resolvedOpType.Kind == ast.TYPE_ARRAY {
526 typ = resolvedOpType.Base
527@@ -471,7 +599,7 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
528 }
529 typ = promotedType.Base
530 } else {
531- util.Error(node.Tok, "Cannot dereference non-pointer type '%s'", typeToString(operandType))
532+ util.Error(node.Tok, "Cannot dereference non-pointer type '%s'", ast.TypeToString(operandType))
533 typ = ast.TypeUntyped
534 }
535 case token.And:
536@@ -485,7 +613,7 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
537 tc.checkExprAsCondition(d.Cond)
538 thenType, elseType := tc.checkExpr(d.ThenExpr), tc.checkExpr(d.ElseExpr)
539 if !tc.areTypesCompatible(thenType, elseType, d.ElseExpr) {
540- tc.typeErrorOrWarn(node.Tok, "Type mismatch in ternary expression branches ('%s' vs '%s')", typeToString(thenType), typeToString(elseType))
541+ tc.typeErrorOrWarn(node.Tok, "Type mismatch in ternary expression branches ('%s' vs '%s')", ast.TypeToString(thenType), ast.TypeToString(elseType))
542 }
543 if thenType != nil && thenType.Kind == ast.TYPE_POINTER {
544 typ = thenType
545@@ -496,8 +624,8 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
546 }
547 case ast.SubscriptNode:
548 arrayType, indexType := tc.checkExpr(d.Array), tc.checkExpr(d.Index)
549- if !tc.isIntegerType(indexType) && indexType.Kind != ast.TYPE_UNTYPED && indexType.Kind != ast.TYPE_UNTYPED_INT {
550- tc.typeErrorOrWarn(d.Index.Tok, "Array subscript is not an integer type ('%s')", typeToString(indexType))
551+ if !tc.isIntegerType(indexType) && indexType.Kind != ast.TYPE_UNTYPED && indexType.Kind != ast.TYPE_LITERAL_INT && indexType.Kind != ast.TYPE_ENUM {
552+ tc.typeErrorOrWarn(d.Index.Tok, "Array subscript is not an integer type ('%s')", ast.TypeToString(indexType))
553 }
554 resolvedArrayType := tc.resolveType(arrayType)
555 if resolvedArrayType.Kind == ast.TYPE_ARRAY || resolvedArrayType.Kind == ast.TYPE_POINTER {
556@@ -515,22 +643,49 @@ func (tc *TypeChecker) checkExpr(node *ast.Node) *ast.BxType {
557 }
558 typ = promotedType.Base
559 } else {
560- util.Error(node.Tok, "Cannot subscript non-array/pointer type '%s'", typeToString(arrayType))
561+ util.Error(node.Tok, "Cannot subscript non-array/pointer type '%s'", ast.TypeToString(arrayType))
562 typ = ast.TypeUntyped
563 }
564 case ast.MemberAccessNode:
565 typ = tc.checkMemberAccess(node)
566 case ast.FuncCallNode:
567 typ = tc.checkFuncCall(node)
568+ case ast.IndirectionNode:
569+ // Handle indirection (dereferencing) operations
570+ indirData := node.Data.(ast.IndirectionNode)
571+ operandType := tc.checkExpr(indirData.Expr)
572+ resolvedOpType := tc.resolveType(operandType)
573+ if resolvedOpType.Kind == ast.TYPE_POINTER || resolvedOpType.Kind == ast.TYPE_ARRAY {
574+ typ = resolvedOpType.Base
575+ } else if resolvedOpType.Kind == ast.TYPE_UNTYPED || tc.isIntegerType(resolvedOpType) {
576+ promotedType := &ast.BxType{Kind: ast.TYPE_POINTER, Base: ast.TypeUntyped}
577+ indirData.Expr.Typ = promotedType
578+ if indirData.Expr.Type == ast.Ident {
579+ if sym := tc.findSymbol(indirData.Expr.Data.(ast.IdentNode).Name, false); sym != nil {
580+ if sym.Type == nil || sym.Type.Kind == ast.TYPE_UNTYPED || tc.isIntegerType(sym.Type) {
581+ sym.Type = promotedType
582+ }
583+ }
584+ }
585+ typ = promotedType.Base
586+ } else {
587+ util.Error(node.Tok, "Cannot dereference non-pointer type '%s'", ast.TypeToString(operandType))
588+ typ = ast.TypeUntyped
589+ }
590 case ast.TypeCastNode:
591 tc.checkExpr(d.Expr)
592 typ = d.TargetType
593+ case ast.TypeOfNode:
594+ tc.checkExpr(d.Expr)
595+ typ = ast.TypeString // typeOf always returns a string
596 case ast.StructLiteralNode:
597 typ = tc.checkStructLiteral(node)
598+ case ast.ArrayLiteralNode:
599+ typ = tc.checkArrayLiteral(node)
600 case ast.NumberNode:
601- typ = ast.TypeUntypedInt
602+ typ = ast.TypeLiteralInt
603 case ast.FloatNumberNode:
604- typ = ast.TypeUntypedFloat
605+ typ = ast.TypeLiteralFloat
606 case ast.StringNode:
607 typ = ast.TypeString
608 case ast.NilNode:
609@@ -582,7 +737,9 @@ func (tc *TypeChecker) checkMemberAccess(node *ast.Node) *ast.BxType {
610
611 baseType := tc.resolveType(exprType)
612
613- if baseType != nil && baseType.Kind == ast.TYPE_POINTER { baseType = baseType.Base }
614+ if baseType != nil && baseType.Kind == ast.TYPE_POINTER {
615+ baseType = baseType.Base
616+ }
617
618 resolvedStructType := tc.resolveType(baseType)
619
620@@ -605,7 +762,7 @@ func (tc *TypeChecker) checkMemberAccess(node *ast.Node) *ast.BxType {
621
622 if resolvedStructType == nil || resolvedStructType.Kind != ast.TYPE_STRUCT {
623 memberName := d.Member.Data.(ast.IdentNode).Name
624- util.Error(node.Tok, "request for member '%s' in non-struct type '%s'", memberName, typeToString(exprType))
625+ util.Error(node.Tok, "request for member '%s' in non-struct type '%s'", memberName, ast.TypeToString(exprType))
626 return ast.TypeUntyped
627 }
628
629@@ -618,18 +775,26 @@ func (tc *TypeChecker) checkMemberAccess(node *ast.Node) *ast.BxType {
630 }
631 }
632
633- util.Error(node.Tok, "no member named '%s' in struct '%s'", memberName, typeToString(resolvedStructType))
634+ util.Error(node.Tok, "no member named '%s' in struct '%s'", memberName, ast.TypeToString(resolvedStructType))
635 return ast.TypeUntyped
636 }
637
638 func (tc *TypeChecker) typeFromName(name string) *ast.BxType {
639- if sym := tc.findSymbol(name, true); sym != nil && sym.IsType { return sym.Type }
640+ if sym := tc.findSymbol(name, true); sym != nil && sym.IsType {
641+ return sym.Type
642+ }
643
644 tokType, isKeyword := token.KeywordMap[name]
645 if isKeyword && tokType >= token.Void && tokType <= token.Any {
646- if tokType == token.Void { return ast.TypeVoid }
647- if tokType == token.StringKeyword { return ast.TypeString }
648- if tokType >= token.Float && tokType <= token.Float64 { return &ast.BxType{Kind: ast.TYPE_FLOAT, Name: name} }
649+ if tokType == token.Void {
650+ return ast.TypeVoid
651+ }
652+ if tokType == token.StringKeyword {
653+ return ast.TypeString
654+ }
655+ if tokType >= token.Float && tokType <= token.Float64 {
656+ return &ast.BxType{Kind: ast.TYPE_FLOAT, Name: name}
657+ }
658 return &ast.BxType{Kind: ast.TYPE_PRIMITIVE, Name: name}
659 }
660 return nil
661@@ -651,7 +816,9 @@ func (tc *TypeChecker) checkFuncCall(node *ast.Node) *ast.BxType {
662 targetType = sym.Type
663 }
664 }
665- if targetType == nil { targetType = tc.checkExpr(arg) }
666+ if targetType == nil {
667+ targetType = tc.checkExpr(arg)
668+ }
669 if targetType == nil {
670 util.Error(arg.Tok, "Cannot determine type for sizeof argument")
671 return ast.TypeUntyped
672@@ -715,7 +882,9 @@ func (tc *TypeChecker) checkFuncCall(node *ast.Node) *ast.BxType {
673 }
674
675 resolvedType := tc.resolveType(funcExprType)
676- if resolvedType != nil && resolvedType.Kind == ast.TYPE_STRUCT { return resolvedType }
677+ if resolvedType != nil && resolvedType.Kind == ast.TYPE_STRUCT {
678+ return resolvedType
679+ }
680
681 return funcExprType
682 }
683@@ -749,7 +918,7 @@ func (tc *TypeChecker) checkStructLiteral(node *ast.Node) *ast.BxType {
684 currentFieldType := tc.resolveType(structType.Fields[i].Data.(ast.VarDeclNode).Type)
685 if !tc.areTypesEqual(firstFieldType, currentFieldType) {
686 util.Error(node.Tok, "positional struct literal for '%s' is only allowed if all fields have the same type, but found '%s' and '%s'",
687- typeIdent.Name, typeToString(firstFieldType), typeToString(currentFieldType))
688+ typeIdent.Name, ast.TypeToString(firstFieldType), ast.TypeToString(currentFieldType))
689 break
690 }
691 }
692@@ -765,11 +934,13 @@ func (tc *TypeChecker) checkStructLiteral(node *ast.Node) *ast.BxType {
693 field := structType.Fields[i].Data.(ast.VarDeclNode)
694 valType := tc.checkExpr(valNode)
695 if !tc.areTypesCompatible(field.Type, valType, valNode) {
696- tc.typeErrorOrWarn(valNode.Tok, "Initializer for field '%s' has wrong type. Expected '%s', got '%s'", field.Name, typeToString(field.Type), typeToString(valType))
697+ tc.typeErrorOrWarn(valNode.Tok, "Initializer for field '%s' has wrong type. Expected '%s', got '%s'", field.Name, ast.TypeToString(field.Type), ast.TypeToString(valType))
698 }
699 }
700 } else {
701- if len(d.Values) > len(structType.Fields) { util.Error(node.Tok, "Too many initializers for struct '%s'", typeIdent.Name) }
702+ if len(d.Values) > len(structType.Fields) {
703+ util.Error(node.Tok, "Too many initializers for struct '%s'", typeIdent.Name)
704+ }
705
706 fieldMap := make(map[string]*ast.Node)
707 for _, fieldNode := range structType.Fields {
708@@ -780,7 +951,9 @@ func (tc *TypeChecker) checkStructLiteral(node *ast.Node) *ast.BxType {
709 usedFields := make(map[string]bool)
710
711 for i, nameNode := range d.Names {
712- if nameNode == nil { continue }
713+ if nameNode == nil {
714+ continue
715+ }
716 fieldName := nameNode.Data.(ast.IdentNode).Name
717
718 if usedFields[fieldName] {
719@@ -800,7 +973,7 @@ func (tc *TypeChecker) checkStructLiteral(node *ast.Node) *ast.BxType {
720 fieldType := field.Data.(ast.VarDeclNode).Type
721
722 if !tc.areTypesCompatible(fieldType, valType, valNode) {
723- tc.typeErrorOrWarn(valNode.Tok, "Initializer for field '%s' has wrong type. Expected '%s', got '%s'", fieldName, typeToString(fieldType), typeToString(valType))
724+ tc.typeErrorOrWarn(valNode.Tok, "Initializer for field '%s' has wrong type. Expected '%s', got '%s'", fieldName, ast.TypeToString(fieldType), ast.TypeToString(valType))
725 }
726 }
727 }
728@@ -808,78 +981,298 @@ func (tc *TypeChecker) checkStructLiteral(node *ast.Node) *ast.BxType {
729 return structType
730 }
731
732+func (tc *TypeChecker) checkArrayLiteral(node *ast.Node) *ast.BxType {
733+ d := node.Data.(ast.ArrayLiteralNode)
734+
735+ // Create pointer type to element type (since array literals decay to pointers)
736+ pointerType := &ast.BxType{Kind: ast.TYPE_POINTER, Base: d.ElementType}
737+
738+ // Type check all the values
739+ for i, valueNode := range d.Values {
740+ valueType := tc.checkExpr(valueNode)
741+ if !tc.areTypesCompatible(d.ElementType, valueType, valueNode) {
742+ tc.typeErrorOrWarn(valueNode.Tok, "Array element %d has wrong type. Expected '%s', got '%s'",
743+ i, ast.TypeToString(d.ElementType), ast.TypeToString(valueType))
744+ }
745+ }
746+
747+ return pointerType
748+}
749+
750+// getNodeName extracts a meaningful string representation from an AST node for error messages
751+func (tc *TypeChecker) getNodeName(node *ast.Node) string {
752+ if node == nil {
753+ return "operand"
754+ }
755+
756+ switch node.Type {
757+ case ast.Ident:
758+ return node.Data.(ast.IdentNode).Name
759+ case ast.Number:
760+ return fmt.Sprintf("%d", node.Data.(ast.NumberNode).Value)
761+ case ast.FloatNumber:
762+ return fmt.Sprintf("%g", node.Data.(ast.FloatNumberNode).Value)
763+ case ast.String:
764+ return fmt.Sprintf("\"%s\"", node.Data.(ast.StringNode).Value)
765+ case ast.MemberAccess:
766+ // Handle member access like sp.shape_color
767+ memberData := node.Data.(ast.MemberAccessNode)
768+ exprName := tc.getNodeName(memberData.Expr)
769+ memberName := tc.getNodeName(memberData.Member)
770+ return fmt.Sprintf("%s.%s", exprName, memberName)
771+ default:
772+ return "operand"
773+ }
774+}
775+
776 func (tc *TypeChecker) getBinaryOpResultType(op token.Type, left, right *ast.BxType, tok token.Token, leftNode, rightNode *ast.Node) *ast.BxType {
777 resLeft, resRight := tc.resolveType(left), tc.resolveType(right)
778 lType, rType := resLeft, resRight
779
780- if lType.Kind == ast.TYPE_UNTYPED_INT && tc.isIntegerType(rType) {
781- if tc.getSizeof(rType) < tc.getSizeof(ast.TypeInt) {
782- lType, rType = ast.TypeInt, ast.TypeInt
783- if rightNode.Typ.Kind != ast.TYPE_UNTYPED_INT { rightNode.Typ = ast.TypeInt }
784+ // Check for explicit type mismatch (both operands are explicitly typed but different)
785+ if lType.Kind != ast.TYPE_LITERAL_INT && lType.Kind != ast.TYPE_LITERAL_FLOAT && lType.Kind != ast.TYPE_UNTYPED &&
786+ rType.Kind != ast.TYPE_LITERAL_INT && rType.Kind != ast.TYPE_LITERAL_FLOAT && rType.Kind != ast.TYPE_UNTYPED &&
787+ tc.isNumericType(lType) && tc.isNumericType(rType) && !tc.areTypesEqual(lType, rType) {
788+ // For numeric types of different sizes, emit warning and promote to larger type
789+ if tc.isIntegerType(lType) && tc.isIntegerType(rType) {
790+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) {
791+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Variable promoted from '%s' to '%s'", ast.TypeToString(left), ast.TypeToString(right))
792+ } else {
793+ // For enum vs int mismatches, suggest casting the enum to its base type
794+ if lType.Kind == ast.TYPE_ENUM && tc.isIntegerType(rType) && rType.Kind != ast.TYPE_ENUM {
795+ leftNodeName := tc.getNodeName(leftNode)
796+ baseTypeName := ast.TypeToString(lType.Base)
797+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), baseTypeName, leftNodeName)
798+ } else if rType.Kind == ast.TYPE_ENUM && tc.isIntegerType(lType) && lType.Kind != ast.TYPE_ENUM {
799+ rightNodeName := tc.getNodeName(rightNode)
800+ baseTypeName := ast.TypeToString(rType.Base)
801+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), baseTypeName, rightNodeName)
802+ } else {
803+ rightNodeName := tc.getNodeName(rightNode)
804+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), ast.TypeToString(right), rightNodeName)
805+ }
806+ }
807+ // Promote to larger type for integer operations
808+ if tc.getSizeof(lType) > tc.getSizeof(rType) {
809+ rType = lType
810+ rightNode.Typ = rType
811+ } else {
812+ lType = rType
813+ leftNode.Typ = lType
814+ }
815 } else {
816- lType = rType
817+ // For enum vs int mismatches, suggest casting the enum to its base type
818+ if lType.Kind == ast.TYPE_ENUM && tc.isIntegerType(rType) && rType.Kind != ast.TYPE_ENUM {
819+ leftNodeName := tc.getNodeName(leftNode)
820+ baseTypeName := ast.TypeToString(lType.Base)
821+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), baseTypeName, leftNodeName)
822+ } else if rType.Kind == ast.TYPE_ENUM && tc.isIntegerType(lType) && lType.Kind != ast.TYPE_ENUM {
823+ rightNodeName := tc.getNodeName(rightNode)
824+ baseTypeName := ast.TypeToString(rType.Base)
825+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), baseTypeName, rightNodeName)
826+ } else {
827+ rightNodeName := tc.getNodeName(rightNode)
828+ tc.typeErrorOrWarn(tok, "operand of type '%s' mismatches operand of type '%s', use %s(%s)", ast.TypeToString(left), ast.TypeToString(right), ast.TypeToString(right), rightNodeName)
829+ }
830+ }
831+ }
832+
833+ // Handle untyped promotion based on weak-types feature
834+ // But don't warn when both operands are the same untyped type
835+ bothUntypedInt := left.Kind == ast.TYPE_LITERAL_INT && right.Kind == ast.TYPE_LITERAL_INT
836+ bothUntyped := left.Kind == ast.TYPE_UNTYPED && right.Kind == ast.TYPE_UNTYPED
837+
838+ if lType.Kind == ast.TYPE_LITERAL_INT && tc.isIntegerType(rType) {
839+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) && !bothUntypedInt {
840+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped literal promoted from '%s' to '%s'", ast.TypeToString(left), ast.TypeToString(rType))
841 }
842+ lType = rType
843 leftNode.Typ = lType
844 }
845- if rType.Kind == ast.TYPE_UNTYPED_INT && tc.isIntegerType(lType) {
846- if tc.getSizeof(lType) < tc.getSizeof(ast.TypeInt) {
847- lType, rType = ast.TypeInt, ast.TypeInt
848- if leftNode.Typ.Kind != ast.TYPE_UNTYPED_INT { leftNode.Typ = ast.TypeInt }
849- } else {
850- rType = lType
851+ if rType.Kind == ast.TYPE_LITERAL_INT && tc.isIntegerType(lType) {
852+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) && !bothUntypedInt {
853+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped literal promoted from '%s' to '%s'", ast.TypeToString(right), ast.TypeToString(lType))
854 }
855+ rType = lType
856 rightNode.Typ = rType
857 }
858
859- if lType.Kind == ast.TYPE_UNTYPED_FLOAT && tc.isFloatType(rType) {
860+ if lType.Kind == ast.TYPE_LITERAL_FLOAT && tc.isFloatType(rType) {
861+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) {
862+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped literal promoted from '%s' to '%s'", ast.TypeToString(left), ast.TypeToString(rType))
863+ }
864 lType = rType
865 leftNode.Typ = rType
866 }
867- if rType.Kind == ast.TYPE_UNTYPED_FLOAT && tc.isFloatType(lType) {
868+ if rType.Kind == ast.TYPE_LITERAL_FLOAT && tc.isFloatType(lType) {
869+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) {
870+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped literal promoted from '%s' to '%s'", ast.TypeToString(right), ast.TypeToString(lType))
871+ }
872 rType = lType
873 rightNode.Typ = rType
874 }
875
876+ // Handle operations between two untyped operands
877+ if bothUntyped {
878+ // Check if both operands are literals
879+ leftIsLiteral := leftNode != nil && (leftNode.Type == ast.Number || leftNode.Type == ast.FloatNumber)
880+ rightIsLiteral := rightNode != nil && (rightNode.Type == ast.Number || rightNode.Type == ast.FloatNumber)
881+
882+ // For untyped + untyped, emit warning
883+ if leftIsLiteral && rightIsLiteral {
884+ // Both are literals, use DebugComp warning
885+ if tc.cfg.IsWarningEnabled(config.WarnDebugComp) {
886+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Operation between untyped operands")
887+ }
888+ } else {
889+ // At least one is not a literal, use prom-types warning
890+ if tc.cfg.IsWarningEnabled(config.WarnPromTypes) {
891+ util.Warn(tc.cfg, config.WarnPromTypes, tok, "Operation between untyped operands")
892+ }
893+ }
894+ // Default to int type for untyped operations
895+ lType = ast.TypeInt
896+ rType = ast.TypeInt
897+ leftNode.Typ = ast.TypeInt
898+ rightNode.Typ = ast.TypeInt
899+ }
900+
901+ // Handle operations between untyped and typed operands
902+ if lType.Kind == ast.TYPE_UNTYPED && tc.isIntegerType(rType) {
903+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) || !bothUntyped {
904+ // Check if the operand is a literal
905+ leftIsLiteral := leftNode != nil && (leftNode.Type == ast.Number || leftNode.Type == ast.FloatNumber)
906+
907+ if leftIsLiteral {
908+ // Literal promotion, use DebugComp warning
909+ if tc.cfg.IsWarningEnabled(config.WarnDebugComp) {
910+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped operand promoted to '%s'", ast.TypeToString(rType))
911+ }
912+ } else {
913+ // Variable promotion, use prom-types warning
914+ if tc.cfg.IsWarningEnabled(config.WarnPromTypes) {
915+ util.Warn(tc.cfg, config.WarnPromTypes, tok, "Untyped operand promoted to '%s'", ast.TypeToString(rType))
916+ }
917+ }
918+ }
919+ lType = rType
920+ leftNode.Typ = rType
921+ }
922+ if rType.Kind == ast.TYPE_UNTYPED && tc.isIntegerType(lType) {
923+ if tc.cfg.IsFeatureEnabled(config.FeatPromTypes) || !bothUntyped {
924+ // Check if the operand is a literal
925+ rightIsLiteral := rightNode != nil && (rightNode.Type == ast.Number || rightNode.Type == ast.FloatNumber)
926+
927+ if rightIsLiteral {
928+ // Literal promotion, use DebugComp warning
929+ if tc.cfg.IsWarningEnabled(config.WarnDebugComp) {
930+ util.Warn(tc.cfg, config.WarnDebugComp, tok, "Untyped operand promoted to '%s'", ast.TypeToString(lType))
931+ }
932+ } else {
933+ // Variable promotion, use prom-types warning
934+ if tc.cfg.IsWarningEnabled(config.WarnPromTypes) {
935+ util.Warn(tc.cfg, config.WarnPromTypes, tok, "Untyped operand promoted to '%s'", ast.TypeToString(lType))
936+ }
937+ }
938+ }
939+ rType = lType
940+ rightNode.Typ = lType
941+ }
942+
943 resLeft, resRight = lType, rType
944
945- if op >= token.EqEq && op <= token.OrOr { return ast.TypeInt }
946+ if op >= token.EqEq && op <= token.OrOr {
947+ return ast.TypeInt
948+ }
949
950 if tc.isNumericType(resLeft) && tc.isNumericType(resRight) {
951- if tc.isFloatType(resLeft) || tc.isFloatType(resRight) { return ast.TypeFloat }
952- if tc.getSizeof(resLeft) > tc.getSizeof(resRight) { return resLeft }
953+ if tc.isFloatType(resLeft) || tc.isFloatType(resRight) {
954+ // If both operands are float types, return the more precise one
955+ if tc.isFloatType(resLeft) && tc.isFloatType(resRight) {
956+ if tc.areTypesEqual(resLeft, resRight) {
957+ return resLeft // Both same float type, preserve it
958+ }
959+ // Different float types, promote to the larger one
960+ if tc.getSizeof(resLeft) > tc.getSizeof(resRight) {
961+ return resLeft
962+ }
963+ return resRight
964+ }
965+ // One float, one integer - return the float type (not machine float)
966+ if tc.isFloatType(resLeft) {
967+ return resLeft
968+ }
969+ return resRight
970+ }
971+ if tc.getSizeof(resLeft) > tc.getSizeof(resRight) {
972+ return resLeft
973+ }
974 return resRight
975 }
976
977 if op == token.Plus || op == token.Minus {
978- if resLeft.Kind == ast.TYPE_POINTER && tc.isIntegerType(resRight) { return resLeft }
979- if tc.isIntegerType(resLeft) && resRight.Kind == ast.TYPE_POINTER && op == token.Plus { return resRight }
980- if op == token.Minus && resLeft.Kind == ast.TYPE_POINTER && resRight.Kind == ast.TYPE_POINTER { return ast.TypeInt }
981+ if resLeft.Kind == ast.TYPE_POINTER && tc.isIntegerType(resRight) {
982+ return resLeft
983+ }
984+ if tc.isIntegerType(resLeft) && resRight.Kind == ast.TYPE_POINTER && op == token.Plus {
985+ return resRight
986+ }
987+ if op == token.Minus && resLeft.Kind == ast.TYPE_POINTER && resRight.Kind == ast.TYPE_POINTER {
988+ return ast.TypeInt
989+ }
990+ // Allow string concatenation: byte* + byte* -> byte* (for Plus operation only)
991+ if op == token.Plus && resLeft.Kind == ast.TYPE_POINTER && resRight.Kind == ast.TYPE_POINTER &&
992+ resLeft.Base != nil && resRight.Base != nil &&
993+ resLeft.Base.Kind == ast.TYPE_PRIMITIVE && resLeft.Base.Name == "byte" &&
994+ resRight.Base.Kind == ast.TYPE_PRIMITIVE && resRight.Base.Name == "byte" {
995+ return resLeft
996+ }
997 }
998
999- tc.typeErrorOrWarn(tok, "Invalid binary operation between types '%s' and '%s'", typeToString(left), typeToString(right))
1000+ tc.typeErrorOrWarn(tok, "Invalid binary operation between types '%s' and '%s'", ast.TypeToString(left), ast.TypeToString(right))
1001 return ast.TypeInt
1002 }
1003
1004 func (tc *TypeChecker) areTypesCompatible(a, b *ast.BxType, bNode *ast.Node) bool {
1005- if a == nil || b == nil || a.Kind == ast.TYPE_UNTYPED { return true }
1006+ if a == nil || b == nil || a.Kind == ast.TYPE_UNTYPED {
1007+ return true
1008+ }
1009
1010- if b.Kind == ast.TYPE_UNTYPED_INT { return tc.isNumericType(a) || a.Kind == ast.TYPE_POINTER || a.Kind == ast.TYPE_BOOL }
1011- if b.Kind == ast.TYPE_UNTYPED_FLOAT { return tc.isFloatType(a) }
1012- if b.Kind == ast.TYPE_UNTYPED { return true }
1013+ if b.Kind == ast.TYPE_LITERAL_INT {
1014+ return tc.isNumericType(a) || a.Kind == ast.TYPE_POINTER || a.Kind == ast.TYPE_BOOL
1015+ }
1016+ if b.Kind == ast.TYPE_LITERAL_FLOAT {
1017+ return tc.isFloatType(a)
1018+ }
1019+ if b.Kind == ast.TYPE_UNTYPED {
1020+ return true
1021+ }
1022
1023 resA, resB := tc.resolveType(a), tc.resolveType(b)
1024
1025- if resA.Kind == ast.TYPE_POINTER && tc.isIntegerType(resB) { return true }
1026- if tc.isIntegerType(resA) && resB.Kind == ast.TYPE_POINTER { return true }
1027+ if resA.Kind == ast.TYPE_POINTER && tc.isIntegerType(resB) {
1028+ return true
1029+ }
1030+ if tc.isIntegerType(resA) && resB.Kind == ast.TYPE_POINTER {
1031+ return true
1032+ }
1033
1034- if resA.Kind == ast.TYPE_NIL { return resB.Kind == ast.TYPE_POINTER || resB.Kind == ast.TYPE_ARRAY || resB.Kind == ast.TYPE_NIL }
1035- if resB.Kind == ast.TYPE_NIL { return resA.Kind == ast.TYPE_POINTER || resA.Kind == ast.TYPE_ARRAY }
1036+ if resA.Kind == ast.TYPE_NIL {
1037+ return resB.Kind == ast.TYPE_POINTER || resB.Kind == ast.TYPE_ARRAY || resB.Kind == ast.TYPE_NIL
1038+ }
1039+ if resB.Kind == ast.TYPE_NIL {
1040+ return resA.Kind == ast.TYPE_POINTER || resA.Kind == ast.TYPE_ARRAY
1041+ }
1042
1043 if resA.Kind == resB.Kind {
1044 switch resA.Kind {
1045 case ast.TYPE_POINTER:
1046- if (resA.Base != nil && resA.Base.Kind == ast.TYPE_VOID) || (resB.Base != nil && resB.Base.Kind == ast.TYPE_VOID) { return true }
1047- if (resA.Base != nil && resA.Base == ast.TypeByte) || (resB.Base != nil && resB.Base == ast.TypeByte) { return true }
1048+ if (resA.Base != nil && resA.Base.Kind == ast.TYPE_VOID) || (resB.Base != nil && resB.Base.Kind == ast.TYPE_VOID) {
1049+ return true
1050+ }
1051+ if (resA.Base != nil && resA.Base == ast.TypeByte) || (resB.Base != nil && resB.Base == ast.TypeByte) {
1052+ return true
1053+ }
1054 return tc.areTypesCompatible(resA.Base, resB.Base, nil)
1055 case ast.TYPE_ARRAY:
1056 return tc.areTypesCompatible(resA.Base, resB.Base, nil)
1057@@ -891,18 +1284,32 @@ func (tc *TypeChecker) areTypesCompatible(a, b *ast.BxType, bNode *ast.Node) boo
1058 return true
1059 }
1060 }
1061- if bNode != nil && bNode.Type == ast.Number && bNode.Data.(ast.NumberNode).Value == 0 && resA.Kind == ast.TYPE_POINTER && tc.isIntegerType(resB) { return true }
1062- if resA.Kind == ast.TYPE_POINTER && resB.Kind == ast.TYPE_ARRAY { return tc.areTypesCompatible(resA.Base, resB.Base, nil) }
1063- if (resA.Kind == ast.TYPE_ENUM && tc.isIntegerType(resB)) || (tc.isIntegerType(resA) && resB.Kind == ast.TYPE_ENUM) { return true }
1064- if tc.isNumericType(resA) && tc.isNumericType(resB) { return true }
1065- if (resA.Kind == ast.TYPE_BOOL && tc.isScalarType(resB)) || (tc.isScalarType(resA) && resB.Kind == ast.TYPE_BOOL) { return true }
1066+ if bNode != nil && bNode.Type == ast.Number && bNode.Data.(ast.NumberNode).Value == 0 && resA.Kind == ast.TYPE_POINTER && tc.isIntegerType(resB) {
1067+ return true
1068+ }
1069+ if resA.Kind == ast.TYPE_POINTER && resB.Kind == ast.TYPE_ARRAY {
1070+ return tc.areTypesCompatible(resA.Base, resB.Base, nil)
1071+ }
1072+ if (resA.Kind == ast.TYPE_ENUM && tc.isIntegerType(resB)) || (tc.isIntegerType(resA) && resB.Kind == ast.TYPE_ENUM) {
1073+ return true
1074+ }
1075+ if tc.isNumericType(resA) && tc.isNumericType(resB) {
1076+ return true
1077+ }
1078+ if (resA.Kind == ast.TYPE_BOOL && tc.isScalarType(resB)) || (tc.isScalarType(resA) && resB.Kind == ast.TYPE_BOOL) {
1079+ return true
1080+ }
1081 return false
1082 }
1083
1084 func (tc *TypeChecker) areTypesEqual(a, b *ast.BxType) bool {
1085- if a == nil || b == nil { return a == b }
1086+ if a == nil || b == nil {
1087+ return a == b
1088+ }
1089 resA, resB := tc.resolveType(a), tc.resolveType(b)
1090- if resA.Kind != resB.Kind { return false }
1091+ if resA.Kind != resB.Kind {
1092+ return false
1093+ }
1094 switch resA.Kind {
1095 case ast.TYPE_POINTER, ast.TYPE_ARRAY:
1096 return tc.areTypesEqual(resA.Base, resB.Base)
1097@@ -935,13 +1342,13 @@ func (tc *TypeChecker) resolveType(typ *ast.BxType) *ast.BxType {
1098 func (tc *TypeChecker) isIntegerType(t *ast.BxType) bool {
1099 if t == nil { return false }
1100 resolved := tc.resolveType(t)
1101- return resolved.Kind == ast.TYPE_PRIMITIVE || resolved.Kind == ast.TYPE_UNTYPED_INT
1102+ return resolved.Kind == ast.TYPE_PRIMITIVE || resolved.Kind == ast.TYPE_LITERAL_INT || resolved.Kind == ast.TYPE_UNTYPED || resolved.Kind == ast.TYPE_ENUM
1103 }
1104
1105 func (tc *TypeChecker) isFloatType(t *ast.BxType) bool {
1106 if t == nil { return false }
1107 resolved := tc.resolveType(t)
1108- return resolved.Kind == ast.TYPE_FLOAT || resolved.Kind == ast.TYPE_UNTYPED_FLOAT
1109+ return resolved.Kind == ast.TYPE_FLOAT || resolved.Kind == ast.TYPE_LITERAL_FLOAT
1110 }
1111
1112 func (tc *TypeChecker) isNumericType(t *ast.BxType) bool {
1113@@ -951,45 +1358,4 @@ func (tc *TypeChecker) isScalarType(t *ast.BxType) bool {
1114 if t == nil { return false }
1115 resolved := tc.resolveType(t)
1116 return tc.isNumericType(resolved) || resolved.Kind == ast.TYPE_POINTER || resolved.Kind == ast.TYPE_BOOL
1117-}
1118-
1119-func typeToString(t *ast.BxType) string {
1120- if t == nil { return "<nil>" }
1121- var sb strings.Builder
1122- if t.IsConst { sb.WriteString("const ") }
1123- switch t.Kind {
1124- case ast.TYPE_PRIMITIVE, ast.TYPE_BOOL, ast.TYPE_FLOAT, ast.TYPE_UNTYPED_INT, ast.TYPE_UNTYPED_FLOAT:
1125- sb.WriteString(t.Name)
1126- case ast.TYPE_POINTER:
1127- sb.WriteString(typeToString(t.Base))
1128- sb.WriteString("*")
1129- case ast.TYPE_ARRAY:
1130- sb.WriteString("[]")
1131- sb.WriteString(typeToString(t.Base))
1132- case ast.TYPE_STRUCT:
1133- sb.WriteString("struct ")
1134- if t.Name != "" {
1135- sb.WriteString(t.Name)
1136- } else if t.StructTag != "" {
1137- sb.WriteString(t.StructTag)
1138- } else {
1139- sb.WriteString("<anonymous>")
1140- }
1141- case ast.TYPE_ENUM:
1142- sb.WriteString("enum ")
1143- if t.Name != "" {
1144- sb.WriteString(t.Name)
1145- } else {
1146- sb.WriteString("<anonymous>")
1147- }
1148- case ast.TYPE_VOID:
1149- sb.WriteString("void")
1150- case ast.TYPE_UNTYPED:
1151- sb.WriteString("untyped")
1152- case ast.TYPE_NIL:
1153- sb.WriteString("nil")
1154- default:
1155- sb.WriteString(fmt.Sprintf("<unknown_type_kind_%d>", t.Kind))
1156- }
1157- return sb.String()
1158-}
1159+}
+1,
-4
1@@ -20,10 +20,7 @@ const (
2 formatItalic = "\033[3m"
3 )
4
5-type SourceFileRecord struct {
6- Name string
7- Content []rune
8-}
9+type SourceFileRecord struct { Name string; Content []rune }
10
11 var sourceFiles []SourceFileRecord
12
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-2081473483/5ebe214e4a711db7",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-2081473483/5ebe214e4a711db7'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 26519162,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "16\nYes, but only as of 29/08/2025\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 452024,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "16\nYes, but only as of 29/08/2025\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 389159,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "16\nYes, but only as of 29/08/2025\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 343399,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "16\nYes, but only as of 29/08/2025\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 316167,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "16\nYes, but only as of 29/08/2025\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 310804,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "16\nYes, but only as of 29/08/2025\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 307158,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "16\nYes, but only as of 29/08/2025\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 308048,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "16\nYes, but only as of 29/08/2025\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 306906,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "16\nYes, but only as of 29/08/2025\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 309619,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-1387192597/1f380b8c92db7ece",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-1387192597/1f380b8c92db7ece'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 30903338,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "two or three\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 333360,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "two or three\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 345843,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "two or three\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 333466,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "two or three\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 385005,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "two or three\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 391410,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "two or three\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 405658,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "two or three\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 399602,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "two or three\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 355614,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "two or three\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 344214,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-1458024948/bd7199957ef6b659",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-1458024948/bd7199957ef6b659'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 23850949,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 328908,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 312441,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 334080,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 629697,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 332966,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 494432,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 315225,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 306996,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "Function-style casts: a=42, int(a)=42, (int(a)+1)=43\nPointer casts work: value=42\nn1(int8) : 8\nn2(int16): 16\nn3(int32): 32\nn4(int64): 64\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 323001,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-2060627087/97f10ee2b293435d",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-2060627087/97f10ee2b293435d'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 26027393,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "1\n2\n4\n5\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 362409,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "1\n2\n4\n5\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 338640,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "1\n2\n4\n5\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 344379,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "1\n2\n4\n5\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 368422,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "1\n2\n4\n5\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 423987,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "1\n2\n4\n5\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 362626,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "1\n2\n4\n5\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 333391,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "1\n2\n4\n5\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 332685,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "1\n2\n4\n5\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 329594,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-2304730691/8c546ccb220f26ce",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-2304730691/8c546ccb220f26ce'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\nedge.bx:12:13: \u001b[33mwarning\u001b[0m:\n \u001b[90m 11 | \u001b[0m auto o = Outer{inner: \u0026i};\n \u001b[1;90m 12 | \u001b[0m return (\u0026o);\n \u001b[1;90m-- | \u001b[0m\u001b[33m ^\u001b[0m \u001b[3mReturning address of local variable 'o' [-Wlocal-address]\u001b[0m \u001b[3m\u001b[90m(emitted from \u001b[1;90mtypeChecker.go\u001b[0m)\u001b[0m\u001b[0m\u001b[0m\n \u001b[90m 13 | \u001b[0m}\n\n",
7+ "exitCode": 0,
8+ "duration": 30136840,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "42\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 360917,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "42\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 474129,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "42\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 363644,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "42\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 363372,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "42\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 744252,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "42\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 417811,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "42\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 452356,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "42\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 370682,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "42\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 367562,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-3071180605/9d8ad77165eb1aae",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-3071180605/9d8ad77165eb1aae'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 28132582,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 451734,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 425538,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 505366,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 365692,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 357893,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 368369,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 379346,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 367333,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "--- Testing Integer Switch ---\nx is two or three\nInteger test passed.\n\n--- Testing Float Operations ---\nFloat multiplication successful: 3.140000 * 2.710000 = 8.509400\nFloat division successful: 3.140000 / 2.0 = 1.570000\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 377177,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+126,
-0
1@@ -0,0 +1,126 @@
2+{
3+ "binary_path": "/tmp/gtest-3724198725/87256ca3ad2a629c",
4+ "compile": {
5+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\nGenerating target code with 'qbe' backend...\nAssembling and linking to create '/tmp/gtest-3724198725/87256ca3ad2a629c'...\n----------------------\nCompilation successful!\n",
6+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\n",
7+ "exitCode": 0,
8+ "duration": 36315010,
9+ "timed_out": false
10+ },
11+ "runs": [
12+ {
13+ "name": "fold",
14+ "args": [
15+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB\n"
16+ ],
17+ "result": {
18+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
19+ "stderr": "",
20+ "exitCode": 0,
21+ "duration": 480889,
22+ "timed_out": false
23+ }
24+ },
25+ {
26+ "name": "fold2",
27+ "args": [
28+ "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX\n"
29+ ],
30+ "result": {
31+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
32+ "stderr": "",
33+ "exitCode": 0,
34+ "duration": 730982,
35+ "timed_out": false
36+ }
37+ },
38+ {
39+ "name": "hashTable",
40+ "args": [
41+ "s foo 10\ns bar 50\ng\ng foo\ng bar\np\nq\n"
42+ ],
43+ "result": {
44+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
45+ "stderr": "",
46+ "exitCode": 0,
47+ "duration": 371906,
48+ "timed_out": false
49+ }
50+ },
51+ {
52+ "name": "no_args",
53+ "result": {
54+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
55+ "stderr": "",
56+ "exitCode": 0,
57+ "duration": 334177,
58+ "timed_out": false
59+ }
60+ },
61+ {
62+ "name": "numeric_arg_0",
63+ "args": [
64+ "0"
65+ ],
66+ "result": {
67+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
68+ "stderr": "",
69+ "exitCode": 0,
70+ "duration": 355225,
71+ "timed_out": false
72+ }
73+ },
74+ {
75+ "name": "numeric_arg_neg",
76+ "args": [
77+ "-5"
78+ ],
79+ "result": {
80+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
81+ "stderr": "",
82+ "exitCode": 0,
83+ "duration": 488458,
84+ "timed_out": false
85+ }
86+ },
87+ {
88+ "name": "numeric_arg_pos",
89+ "args": [
90+ "5"
91+ ],
92+ "result": {
93+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
94+ "stderr": "",
95+ "exitCode": 0,
96+ "duration": 567712,
97+ "timed_out": false
98+ }
99+ },
100+ {
101+ "name": "quit",
102+ "args": [
103+ "q"
104+ ],
105+ "result": {
106+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
107+ "stderr": "",
108+ "exitCode": 0,
109+ "duration": 346934,
110+ "timed_out": false
111+ }
112+ },
113+ {
114+ "name": "string_arg",
115+ "args": [
116+ "test"
117+ ],
118+ "result": {
119+ "stdout": "p.x: 10\np.y: 20\np2.x: 2\np2.y: 4\nv.x: 16\nv.y: 32\nv2.x: 5\nv2.y: 10\ns.a: 0.100000\ns.b: 0.200000\ns.c: 0.300000\ns.z: 50\n",
120+ "stderr": "",
121+ "exitCode": 0,
122+ "duration": 328997,
123+ "timed_out": false
124+ }
125+ }
126+ ]
127+}
+10,
-0
1@@ -0,0 +1,10 @@
2+{
3+ "compile": {
4+ "stdout": "----------------------\nTokenizing 1 source file(s) (Typed Pass: true)...\nParsing tokens into AST...\nConstant folding...\nType checking...\nGenerating backend-agnostic IR...\n",
5+ "stderr": "gbc: info: no target specified, defaulting to host target 'amd64_sysv' for backend 'qbe'\ngbc: info: using backend 'qbe' with target 'amd64_sysv' (GOOS=linux, GOARCH=amd64)\nunknownIdentifier.bx:3:5: \u001b[31merror\u001b[0m:\n \u001b[90m 2 | \u001b[0m a := 42;\n \u001b[1;90m 3 | \u001b[0m a(); // OK, OK, calling it...\n \u001b[1;90m- | \u001b[0m\u001b[31m ^\u001b[0m \u001b[3m'a' is a variable but is used as a function\u001b[0m \u001b[3m\u001b[90m(emitted from \u001b[1;90mcodegen_helpers.go\u001b[0m)\u001b[0m\u001b[0m\u001b[0m\n \u001b[90m 4 | \u001b[0m}\n\n",
6+ "exitCode": 1,
7+ "duration": 2462372,
8+ "timed_out": false
9+ },
10+ "runs": null
11+}
+19,
-0
1@@ -0,0 +1,19 @@
2+type int Age;
3+extrn printf;
4+
5+asOf := "29/08/2025";
6+
7+Age getAge() {
8+ return (Age(16));
9+}
10+
11+main() {
12+ auto myAge = getAge();
13+ printf("%d\n", int(myAge));
14+
15+ if (myAge / 16 == 1) {
16+ printf("Yes, but only as of %s\n", asOf);
17+ }
18+
19+ return(0);
20+}
+289,
-0
1@@ -0,0 +1,289 @@
2+extrn printf, malloc;
3+
4+type struct Point {
5+ x int;
6+ y int;
7+ name byte*;
8+};
9+
10+type enum Color {
11+ RED,
12+ GREEN,
13+ BLUE,
14+ YELLOW
15+};
16+
17+void demo_integer_arrays() {
18+ printf("\n-- Integer arrays --\n");
19+
20+ // Signed integer types
21+ int int_array[3];
22+ int8 int8_array[3];
23+ int16 int16_array[3];
24+ int32 int32_array[3];
25+ int64 int64_array[3];
26+
27+ // Unsigned integer types
28+ uint uint_array[3];
29+ uint8 uint8_array[3];
30+ uint16 uint16_array[3];
31+ uint32 uint32_array[3];
32+ uint64 uint64_array[3];
33+
34+ // Byte type (alias for uint8)
35+ byte byte_array[3];
36+
37+ // Initialize and display int array
38+ int_array[0] = -100;
39+ int_array[1] = 0;
40+ int_array[2] = 100;
41+ printf("int array: [%d, %d, %d]\n", int_array[0], int_array[1], int_array[2]);
42+
43+ // Initialize and display int8 array
44+ int8_array[0] = -50;
45+ int8_array[1] = 0;
46+ int8_array[2] = 50;
47+ printf("int8 array: [%d, %d, %d]\n", int8_array[0], int8_array[1], int8_array[2]);
48+
49+ // Initialize and display int16 array
50+ int16_array[0] = -1000;
51+ int16_array[1] = 0;
52+ int16_array[2] = 1000;
53+ printf("int16 array: [%d, %d, %d]\n", int16_array[0], int16_array[1], int16_array[2]);
54+
55+ // Initialize and display int32 array
56+ int32_array[0] = -100000;
57+ int32_array[1] = 0;
58+ int32_array[2] = 100000;
59+ printf("int32 array: [%d, %d, %d]\n", int32_array[0], int32_array[1], int32_array[2]);
60+
61+ // Initialize and display int64 array
62+ int64_array[0] = -1000000;
63+ int64_array[1] = 0;
64+ int64_array[2] = 1000000;
65+ printf("int64 array: [%ld, %ld, %ld]\n", int64_array[0], int64_array[1], int64_array[2]);
66+
67+ // Initialize and display uint array
68+ uint_array[0] = 10;
69+ uint_array[1] = 20;
70+ uint_array[2] = 30;
71+ printf("uint array: [%u, %u, %u]\n", uint_array[0], uint_array[1], uint_array[2]);
72+
73+ // Initialize and display uint8 array
74+ uint8_array[0] = 100;
75+ uint8_array[1] = 150;
76+ uint8_array[2] = 200;
77+ printf("uint8 array: [%u, %u, %u]\n", uint8_array[0], uint8_array[1], uint8_array[2]);
78+
79+ // Initialize and display uint16 array
80+ uint16_array[0] = 1000;
81+ uint16_array[1] = 2000;
82+ uint16_array[2] = 3000;
83+ printf("uint16 array: [%u, %u, %u]\n", uint16_array[0], uint16_array[1], uint16_array[2]);
84+
85+ // Initialize and display uint32 array
86+ uint32_array[0] = 100000;
87+ uint32_array[1] = 200000;
88+ uint32_array[2] = 300000;
89+ printf("uint32 array: [%u, %u, %u]\n", uint32_array[0], uint32_array[1], uint32_array[2]);
90+
91+ // Initialize and display uint64 array
92+ uint64_array[0] = 1000000;
93+ uint64_array[1] = 2000000;
94+ uint64_array[2] = 3000000;
95+ printf("uint64 array: [%lu, %lu, %lu]\n", uint64_array[0], uint64_array[1], uint64_array[2]);
96+
97+ // Initialize and display byte array (as characters)
98+ byte_array[0] = 'A';
99+ byte_array[1] = 'B';
100+ byte_array[2] = 'C';
101+ printf("byte array: [%c, %c, %c]\n", byte_array[0], byte_array[1], byte_array[2]);
102+}
103+
104+void demo_float_arrays() {
105+ printf("\n-- Float arrays --\n");
106+
107+ float float_array[3];
108+ float32 float32_array[3];
109+ float64 float64_array[3];
110+
111+ // Initialize and display float array
112+ float_array[0] = 1.1;
113+ float_array[1] = 2.2;
114+ float_array[2] = 3.3;
115+ printf("float array: [%.2f, %.2f, %.2f]\n", float_array[0], float_array[1], float_array[2]);
116+
117+ // Initialize and display float32 array
118+ float32_array[0] = 1.25;
119+ float32_array[1] = 2.75;
120+ float32_array[2] = 3.125;
121+ printf("float32 array: [%.3f, %.3f, %.3f]\n", float32_array[0], float32_array[1], float32_array[2]);
122+
123+ // Initialize and display float64 array
124+ float64_array[0] = 1.123456;
125+ float64_array[1] = 2.789012;
126+ float64_array[2] = 3.456789;
127+ printf("float64 array: [%.6f, %.6f, %.6f]\n", float64_array[0], float64_array[1], float64_array[2]);
128+}
129+
130+void demo_bool_arrays() {
131+ printf("\n-- Bool arrays --\n");
132+
133+ bool bool_array[4];
134+
135+ // Initialize boolean array
136+ bool_array[0] = 1; // true
137+ bool_array[1] = 0; // false
138+ bool_array[2] = 1; // Non-zero is true
139+ bool_array[3] = 0; // Zero is false
140+
141+ printf("bool array: [%s, %s, %s, %s]\n",
142+ bool_array[0] ? "true" : "false",
143+ bool_array[1] ? "true" : "false",
144+ bool_array[2] ? "true" : "false",
145+ bool_array[3] ? "true" : "false");
146+}
147+
148+void demo_pointer_arrays() {
149+ printf("\n-- Pointer arrays --\n");
150+
151+ int values[3];
152+ int* int_ptr_array[3];
153+ byte* string_array[3];
154+ void* void_ptr_array[3];
155+
156+ // Initialize some values
157+ values[0] = 42;
158+ values[1] = 84;
159+ values[2] = 126;
160+
161+ // Initialize pointer array to point to values
162+ int_ptr_array[0] = &values[0];
163+ int_ptr_array[1] = &values[1];
164+ int_ptr_array[2] = &values[2];
165+
166+ printf("int* array dereferenced: [%d, %d, %d]\n",
167+ *int_ptr_array[0], *int_ptr_array[1], *int_ptr_array[2]);
168+
169+ // Initialize string array
170+ string_array[0] = "Hello";
171+ string_array[1] = "World";
172+ string_array[2] = "GBC";
173+
174+ printf("string array: [\"%s\", \"%s\", \"%s\"]\n",
175+ string_array[0], string_array[1], string_array[2]);
176+
177+ // Initialize void pointer array (can point to any type)
178+ void_ptr_array[0] = &values[0]; // points to int
179+ void_ptr_array[1] = string_array[0]; // points to string
180+ void_ptr_array[2] = int_ptr_array; // points to array
181+
182+ //printf("void* array addresses: [%p, %p, %p]\n",
183+ // void_ptr_array[0], void_ptr_array[1], void_ptr_array[2]);
184+}
185+
186+void demo_struct_arrays() {
187+ printf("\n-- Struct arrays --\n");
188+
189+ Point point_array[3];
190+
191+ // Initialize struct array
192+ point_array[0].x = 10;
193+ point_array[0].y = 20;
194+ point_array[0].name = "Origin";
195+
196+ point_array[1].x = 100;
197+ point_array[1].y = 200;
198+ point_array[1].name = "Point A";
199+
200+ point_array[2].x = -50;
201+ point_array[2].y = 75;
202+ point_array[2].name = "Point B";
203+
204+ printf("Point array:\n");
205+ i := 0;
206+ while (i < 3) {
207+ printf(" [%d]: (%d, %d) \"%s\"\n", i,
208+ point_array[i].x, point_array[i].y, point_array[i].name);
209+ i = i + 1;
210+ }
211+}
212+
213+void demo_enum_arrays() {
214+ printf("\n-- Enum arrays --\n");
215+
216+ Color color_array[4];
217+
218+ // Initialize enum array
219+ color_array[0] = RED;
220+ color_array[1] = GREEN;
221+ color_array[2] = BLUE;
222+ color_array[3] = YELLOW;
223+
224+ printf("Color array:\n");
225+ i := 0;
226+ while (i < 4) {
227+ color_name := "";
228+ switch (color_array[i]) {
229+ case RED:
230+ color_name = "RED";
231+ break;
232+ case GREEN:
233+ color_name = "GREEN";
234+ break;
235+ case BLUE:
236+ color_name = "BLUE";
237+ break;
238+ case YELLOW:
239+ color_name = "YELLOW";
240+ break;
241+ default:
242+ color_name = "UNKNOWN";
243+ }
244+ printf(" [%d]: %s (%d)\n", i, color_name, color_array[i]);
245+ i = i + 1;
246+ }
247+}
248+
249+void demo_struct_pointer_arrays() {
250+ printf("\n-- Struct pointer arrays (dynamic) --\n");
251+
252+ Point* point_ptr_array[2];
253+
254+ // Allocate structs dynamically
255+ point_ptr_array[0] = malloc(sizeof(Point));
256+ point_ptr_array[1] = malloc(sizeof(Point));
257+
258+ // Initialize dynamically allocated structs
259+ point_ptr_array[0].x = 300;
260+ point_ptr_array[0].y = 400;
261+ point_ptr_array[0].name = "Dynamic A";
262+
263+ point_ptr_array[1].x = -150;
264+ point_ptr_array[1].y = 250;
265+ point_ptr_array[1].name = "Dynamic B";
266+
267+ printf("Dynamic Point* array:\n");
268+ i := 0;
269+ while (i < 2) {
270+ printf(" [%d]: (%d, %d) \"%s\"\n", i,
271+ point_ptr_array[i].x, point_ptr_array[i].y, point_ptr_array[i].name);
272+ i = i + 1;
273+ }
274+}
275+
276+int main() {
277+ printf("GBC array types test\n");
278+ printf("Quick run of arrays for core types.\n");
279+
280+ demo_integer_arrays();
281+ demo_float_arrays();
282+ demo_bool_arrays();
283+ demo_pointer_arrays();
284+ demo_struct_arrays();
285+ demo_enum_arrays();
286+ demo_struct_pointer_arrays();
287+
288+ printf("\nDone.\n");
289+ return (0);
290+}
+253,
-0
1@@ -0,0 +1,253 @@
2+#include <stdio.h>
3+#include <stdlib.h>
4+#include <stdint.h>
5+#include <stdbool.h>
6+
7+typedef struct Point {
8+ int x;
9+ int y;
10+ unsigned char *name;
11+} Point;
12+
13+typedef enum Color {
14+ RED,
15+ GREEN,
16+ BLUE,
17+ YELLOW
18+} Color;
19+
20+void demo_integer_arrays(void) {
21+ printf("\n-- Integer arrays --\n");
22+
23+ // Signed integer types
24+ int int_array[3];
25+ int8_t int8_array[3];
26+ int16_t int16_array[3];
27+ int32_t int32_array[3];
28+ int64_t int64_array[3];
29+
30+ // Unsigned integer types
31+ unsigned int uint_array[3];
32+ uint8_t uint8_array[3];
33+ uint16_t uint16_array[3];
34+ uint32_t uint32_array[3];
35+ uint64_t uint64_array[3];
36+
37+ // Byte type
38+ uint8_t byte_array[3];
39+
40+ int_array[0] = -100;
41+ int_array[1] = 0;
42+ int_array[2] = 100;
43+ printf("int array: [%d, %d, %d]\n", int_array[0], int_array[1], int_array[2]);
44+
45+ int8_array[0] = -50;
46+ int8_array[1] = 0;
47+ int8_array[2] = 50;
48+ printf("int8 array: [%d, %d, %d]\n", int8_array[0], int8_array[1], int8_array[2]);
49+
50+ int16_array[0] = -1000;
51+ int16_array[1] = 0;
52+ int16_array[2] = 1000;
53+ printf("int16 array: [%d, %d, %d]\n", int16_array[0], int16_array[1], int16_array[2]);
54+
55+ int32_array[0] = -100000;
56+ int32_array[1] = 0;
57+ int32_array[2] = 100000;
58+ printf("int32 array: [%d, %d, %d]\n", int32_array[0], int32_array[1], int32_array[2]);
59+
60+ int64_array[0] = -1000000;
61+ int64_array[1] = 0;
62+ int64_array[2] = 1000000;
63+ printf("int64 array: [%lld, %lld, %lld]\n",
64+ (long long)int64_array[0], (long long)int64_array[1], (long long)int64_array[2]);
65+
66+ uint_array[0] = 10;
67+ uint_array[1] = 20;
68+ uint_array[2] = 30;
69+ printf("uint array: [%u, %u, %u]\n", uint_array[0], uint_array[1], uint_array[2]);
70+
71+ uint8_array[0] = 100;
72+ uint8_array[1] = 150;
73+ uint8_array[2] = 200;
74+ printf("uint8 array: [%u, %u, %u]\n", uint8_array[0], uint8_array[1], uint8_array[2]);
75+
76+ uint16_array[0] = 1000;
77+ uint16_array[1] = 2000;
78+ uint16_array[2] = 3000;
79+ printf("uint16 array: [%u, %u, %u]\n", uint16_array[0], uint16_array[1], uint16_array[2]);
80+
81+ uint32_array[0] = 100000;
82+ uint32_array[1] = 200000;
83+ uint32_array[2] = 300000;
84+ printf("uint32 array: [%u, %u, %u]\n", uint32_array[0], uint32_array[1], uint32_array[2]);
85+
86+ uint64_array[0] = 1000000;
87+ uint64_array[1] = 2000000;
88+ uint64_array[2] = 3000000;
89+ printf("uint64 array: [%llu, %llu, %llu]\n",
90+ (unsigned long long)uint64_array[0],
91+ (unsigned long long)uint64_array[1],
92+ (unsigned long long)uint64_array[2]);
93+
94+ byte_array[0] = 'A';
95+ byte_array[1] = 'B';
96+ byte_array[2] = 'C';
97+ printf("byte array: [%c, %c, %c]\n", byte_array[0], byte_array[1], byte_array[2]);
98+}
99+
100+void demo_float_arrays(void) {
101+ printf("\n-- Float arrays --\n");
102+
103+ float float_array[3];
104+ float float32_array[3];
105+ double float64_array[3];
106+
107+ float_array[0] = 1.1f;
108+ float_array[1] = 2.2f;
109+ float_array[2] = 3.3f;
110+ printf("float array: [%.2f, %.2f, %.2f]\n", float_array[0], float_array[1], float_array[2]);
111+
112+ float32_array[0] = 1.25f;
113+ float32_array[1] = 2.75f;
114+ float32_array[2] = 3.125f;
115+ printf("float32 array: [%.3f, %.3f, %.3f]\n", float32_array[0], float32_array[1], float32_array[2]);
116+
117+ float64_array[0] = 1.123456;
118+ float64_array[1] = 2.789012;
119+ float64_array[2] = 3.456789;
120+ printf("float64 array: [%.6f, %.6f, %.6f]\n", float64_array[0], float64_array[1], float64_array[2]);
121+}
122+
123+void demo_bool_arrays(void) {
124+ printf("\n-- Bool arrays --\n");
125+
126+ bool bool_array[4];
127+ bool_array[0] = true;
128+ bool_array[1] = false;
129+ bool_array[2] = true;
130+ bool_array[3] = false;
131+
132+ printf("bool array: [%s, %s, %s, %s]\n",
133+ bool_array[0] ? "true" : "false",
134+ bool_array[1] ? "true" : "false",
135+ bool_array[2] ? "true" : "false",
136+ bool_array[3] ? "true" : "false");
137+}
138+
139+void demo_pointer_arrays(void) {
140+ printf("\n-- Pointer arrays --\n");
141+
142+ int values[3];
143+ int *int_ptr_array[3];
144+ unsigned char *string_array[3];
145+ void *void_ptr_array[3];
146+
147+ values[0] = 42;
148+ values[1] = 84;
149+ values[2] = 126;
150+
151+ int_ptr_array[0] = &values[0];
152+ int_ptr_array[1] = &values[1];
153+ int_ptr_array[2] = &values[2];
154+
155+ printf("int* array dereferenced: [%d, %d, %d]\n",
156+ *int_ptr_array[0], *int_ptr_array[1], *int_ptr_array[2]);
157+
158+ string_array[0] = (unsigned char *)"Hello";
159+ string_array[1] = (unsigned char *)"World";
160+ string_array[2] = (unsigned char *)"GBC";
161+
162+ printf("string array: [\"%s\", \"%s\", \"%s\"]\n",
163+ string_array[0], string_array[1], string_array[2]);
164+
165+ void_ptr_array[0] = &values[0];
166+ void_ptr_array[1] = string_array[0];
167+ void_ptr_array[2] = int_ptr_array;
168+}
169+
170+void demo_struct_arrays(void) {
171+ printf("\n-- Struct arrays --\n");
172+
173+ Point point_array[3];
174+
175+ point_array[0].x = 10; point_array[0].y = 20; point_array[0].name = (unsigned char *)"Origin";
176+ point_array[1].x = 100; point_array[1].y = 200; point_array[1].name = (unsigned char *)"Point A";
177+ point_array[2].x = -50; point_array[2].y = 75; point_array[2].name = (unsigned char *)"Point B";
178+
179+ printf("Point array:\n");
180+ int i = 0;
181+ while (i < 3) {
182+ printf(" [%d]: (%d, %d) \"%s\"\n", i,
183+ point_array[i].x, point_array[i].y, point_array[i].name);
184+ i = i + 1;
185+ }
186+}
187+
188+void demo_enum_arrays(void) {
189+ printf("\n-- Enum arrays --\n");
190+
191+ Color color_array[4];
192+ color_array[0] = RED;
193+ color_array[1] = GREEN;
194+ color_array[2] = BLUE;
195+ color_array[3] = YELLOW;
196+
197+ printf("Color array:\n");
198+ int i = 0;
199+ while (i < 4) {
200+ const char *color_name;
201+ switch (color_array[i]) {
202+ case RED: color_name = "RED"; break;
203+ case GREEN: color_name = "GREEN"; break;
204+ case BLUE: color_name = "BLUE"; break;
205+ case YELLOW: color_name = "YELLOW"; break;
206+ default: color_name = "UNKNOWN";
207+ }
208+ printf(" [%d]: %s (%d)\n", i, color_name, color_array[i]);
209+ i = i + 1;
210+ }
211+}
212+
213+void demo_struct_pointer_arrays(void) {
214+ printf("\n-- Struct pointer arrays (dynamic) --\n");
215+
216+ Point *point_ptr_array[2];
217+ point_ptr_array[0] = malloc(sizeof(Point));
218+ point_ptr_array[1] = malloc(sizeof(Point));
219+
220+ point_ptr_array[0]->x = 300;
221+ point_ptr_array[0]->y = 400;
222+ point_ptr_array[0]->name = (unsigned char *)"Dynamic A";
223+
224+ point_ptr_array[1]->x = -150;
225+ point_ptr_array[1]->y = 250;
226+ point_ptr_array[1]->name = (unsigned char *)"Dynamic B";
227+
228+ printf("Dynamic Point* array:\n");
229+ int i = 0;
230+ while (i < 2) {
231+ printf(" [%d]: (%d, %d) \"%s\"\n", i,
232+ point_ptr_array[i]->x, point_ptr_array[i]->y, point_ptr_array[i]->name);
233+ i = i + 1;
234+ }
235+
236+ free(point_ptr_array[0]);
237+ free(point_ptr_array[1]);
238+}
239+
240+int main(void) {
241+ printf("GBC array types test\n");
242+ printf("Quick run of arrays for core types.\n");
243+
244+ demo_integer_arrays();
245+ demo_float_arrays();
246+ demo_bool_arrays();
247+ demo_pointer_arrays();
248+ demo_struct_arrays();
249+ demo_enum_arrays();
250+ demo_struct_pointer_arrays();
251+
252+ printf("\nDone.\n");
253+ return 0;
254+}
+13,
-11
1@@ -1,15 +1,17 @@
2-// [b]: requires: -pedantic -std=Bx
3+extrn printf;
4
5-extrn puts;
6+puts(arg string){
7+ printf("%s\n", arg);
8+}
9+
10+// TODO: Copilot: Make this yield an error: Tried to assign to undeclared identifier. Did you intend to use := ?
11+// global_short_aaa = 10; // Commented out - this is expected to be an error case
12
13-// Test implicit global declaration
14-global_implicit = 10;
15+global_short := 10;
16
17-// Test explicit type-inferred global declaration
18-auto global_auto = 20;
19+auto global_auto := 20;
20
21-// Test multi-declaration global
22-auto global_multi_1, global_multi_2 = 30, 40;
23+auto global_multi_1, global_multi_2 := 30, 40;
24
25 main() {
26 auto local_auto;
27@@ -21,8 +23,8 @@ main() {
28 // Test multi-short declaration
29 local_multi_short_1, local_multi_short_2 := 70, 80;
30
31- if (global_implicit != 10) {
32- puts("FAIL: global_implicit");
33+ if (global_short != 10) {
34+ puts("FAIL: global_short");
35 return (1);
36 }
37 if (global_auto != 20) {
38@@ -46,6 +48,6 @@ main() {
39 return (1);
40 }
41
42- puts("PASS: All Bx declaration syntax seems to work.");
43+ puts("PASS: All Bx declaration syntax seems to work.\n");
44 return (0);
45 }
+16,
-0
1@@ -0,0 +1,16 @@
2+main() {
3+ extrn puts;
4+ auto x; x = 2;
5+ switch (x) {
6+ case 1:
7+ puts("one");
8+ exit(1);
9+ case 2:
10+ puts("two");
11+ exit(0);
12+ default:
13+ puts("other");
14+ exit(1);
15+ }
16+ return(0);
17+}
+15,
-0
1@@ -0,0 +1,15 @@
2+main() {
3+ extrn puts;
4+ x := 2;
5+ switch (x) {
6+ case 1:
7+ puts("one");
8+ break;
9+ case 2, 3:
10+ puts("two or three");
11+ break;
12+ default:
13+ puts("other");
14+ }
15+ return(0);
16+}
+34,
-0
1@@ -0,0 +1,34 @@
2+// Test casting behavior - only function-style casts allowed for simple types
3+main() {
4+ extrn printf, exit;
5+
6+ auto a = 42;
7+
8+ // testing some basic casting
9+ b := int(a);
10+ c := (int(a) + 1);
11+
12+ printf("Function-style casts: a=%d, int(a)=%d, (int(a)+1)=%d\n", a, b, c);
13+
14+ // testing pointer casting
15+ ptr := &a;
16+ byte_ptr := (byte*)ptr;
17+ int_ptr := (int*)byte_ptr;
18+ value := *int_ptr;
19+
20+ printf("Pointer casts work: value=%d\n", value);
21+
22+ // --- Others
23+ n1 := int8(8);
24+ n2 := int16(16);
25+ n3 := int32(32);
26+ n4 := int64(64);
27+ // -
28+ printf("n1(int8) : %d\n", n1);
29+ printf("n2(int16): %d\n", n2);
30+ printf("n3(int32): %d\n", n3);
31+ printf("n4(int64): %d\n", n4);
32+ // ---
33+
34+ exit(0);
35+}
+12,
-0
1@@ -0,0 +1,12 @@
2+main() {
3+ extrn printf;
4+ auto i = 0;
5+ while (i < 5) {
6+ i++;
7+ if (i == 3) {
8+ continue;
9+ }
10+ printf("%d\n", i); // 1, 2, 4, 5
11+ }
12+ return(0);
13+}
+21,
-0
1@@ -0,0 +1,21 @@
2+type struct Inner {
3+ value int;
4+};
5+
6+type struct Outer {
7+ inner Inner*;
8+};
9+
10+Outer* createOuter(val int) {
11+ auto i = Inner{value: val};
12+ auto o = Outer{inner: &i};
13+ return (&o);
14+}
15+
16+main() {
17+ extrn printf;
18+
19+ auto outerPtr = createOuter(42);
20+ printf("%d\n", outerPtr.inner.value);
21+ return(0);
22+}
+16,
-0
1@@ -0,0 +1,16 @@
2+// Tests enums
3+type enum Color {
4+ RED,
5+ GREEN,
6+ BLUE
7+};
8+
9+main() {
10+ extrn puts;
11+
12+ auto myColor = GREEN;
13+ if (myColor == GREEN) {
14+ puts("The color is green");
15+ }
16+ return(0);
17+}
+50,
-0
1@@ -0,0 +1,50 @@
2+extrn printf;
3+
4+type enum TrafficLight {
5+ RED,
6+ YELLOW,
7+ GREEN
8+};
9+
10+TrafficLight get_next_light(current_light TrafficLight) {
11+ if (int(current_light) == GREEN) {
12+ return (YELLOW);
13+ }
14+ if (int(current_light) == YELLOW) {
15+ return (RED);
16+ }
17+ return (GREEN);
18+}
19+
20+void print_action(light TrafficLight) {
21+ switch (light) {
22+ case RED:
23+ printf("Light is RED. Action: Stop.\n");
24+ break;
25+ case YELLOW:
26+ printf("Light is YELLOW. Action: Caution.\n");
27+ break;
28+ case GREEN:
29+ printf("Light is GREEN. Action: Go.\n");
30+ break;
31+ default:
32+ printf("Unknown light state.\n");
33+ }
34+}
35+
36+int main() {
37+ auto current_light = RED;
38+
39+ print_action(current_light);
40+
41+ current_light = get_next_light(current_light);
42+ print_action(current_light);
43+
44+ current_light = get_next_light(current_light);
45+ print_action(current_light);
46+
47+ current_light = get_next_light(current_light);
48+ print_action(current_light);
49+
50+ return (0);
51+}
+57,
-0
1@@ -0,0 +1,57 @@
2+extrn printf;
3+
4+type enum Color {
5+ RED,
6+ GREEN,
7+ BLUE,
8+ YELLOW
9+};
10+
11+type struct Shape {
12+ shape_color Color;
13+ x_pos int;
14+ y_pos int;
15+};
16+
17+Shape create_shape(c Color, x int, y int) {
18+ new_shape := Shape{};
19+ new_shape.shape_color = c;
20+ new_shape.x_pos = x;
21+ new_shape.y_pos = y;
22+ return (new_shape);
23+}
24+
25+void print_shape(s Shape) {
26+ color_names := []byte*{ "RED", "GREEN", "BLUE", "YELLOW" };
27+ printf("Shape -> Color: %s, Position: (%d, %d)\n",
28+ color_names[s.shape_color], s.x_pos, s.y_pos);
29+}
30+
31+void move_shape(s Shape*, dx int, dy int) {
32+ s.x_pos = s.x_pos + dx;
33+ s.y_pos = s.y_pos + dy;
34+}
35+
36+int get_shape_x(s Shape) {
37+ return (s.x_pos);
38+}
39+
40+int main() {
41+ printf("Creating a RED shape...\n");
42+
43+ auto sp = create_shape(RED, 10, 20);
44+ print_shape(sp);
45+
46+ printf("\nMoving shape...\n");
47+ move_shape(&sp, 5, -5);
48+ print_shape(sp);
49+
50+ printf("\nChanging color...\n");
51+ colors := []Color{ RED, GREEN, BLUE, YELLOW };
52+ sp.shape_color = colors[(int(sp.shape_color) + 1) % 4];
53+ print_shape(sp);
54+
55+ printf("\nThe final X position is: %d\n", get_shape_x(sp));
56+
57+ return (0);
58+}
+147,
-0
1@@ -0,0 +1,147 @@
2+extrn printf, sprintf;
3+
4+main() {
5+ byte buffer[200];
6+ auto passed = 0;
7+ auto total = 0;
8+
9+ printf("=== Escape Sequence Parser Tests ===\n");
10+
11+ // Test 1: Basic escape sequences
12+ printf("\n--- Testing Basic Escape Sequences ---\n");
13+ total++;
14+ sprintf(buffer, "newline:\n tab:\t backslash:\\ quote:\" apostrophe:'");
15+ if (buffer[8] == '\n' && buffer[14] == '\t' && buffer[26] == '\\' && buffer[34] == '"' && buffer[47] == '\'') {
16+ printf("[PASS] Basic escape sequences work correctly\n");
17+ passed++;
18+ } else {
19+ printf("[FAIL] Basic escape sequences failed\n");
20+ }
21+
22+ // Test 2: Octal escape sequence \000 (NULL)
23+ total++;
24+ sprintf(buffer, "null:\000test");
25+ if (buffer[5] == 0) {
26+ printf("[PASS] Octal \\000 (NULL) escape sequence works\n");
27+ passed++;
28+ } else {
29+ printf("[FAIL] Octal \\000 escape sequence failed\n");
30+ }
31+
32+ // Test 3: Octal escape sequence \033 (ESC for ANSI)
33+ total++;
34+ sprintf(buffer, "esc:\033test");
35+ if (buffer[4] == 27) { // ASCII 27 = ESC
36+ printf("[PASS] Octal \\033 (ESC) escape sequence works\n");
37+ passed++;
38+ } else {
39+ printf("[FAIL] Octal \\033 escape sequence failed (got %d, expected 27)\n", buffer[4]);
40+ }
41+
42+ // Test 4: Octal escape sequence \101 (ASCII 'A')
43+ total++;
44+ sprintf(buffer, "A:\101test");
45+ if (buffer[2] == 65) { // ASCII 65 = 'A'
46+ printf("[PASS] Octal \\101 (ASCII A) escape sequence works\n");
47+ passed++;
48+ } else {
49+ printf("[FAIL] Octal \\101 escape sequence failed (got %d, expected 65)\n", buffer[2]);
50+ }
51+
52+ // Test 5: Octal escape sequence \377 (max value 255)
53+ // NOTE: This test currently fails due to UTF-8 encoding issues in string literals
54+ // The value 255 gets UTF-8 encoded as bytes 195,191 instead of a single byte 255
55+ total++;
56+ sprintf(buffer, "max:\377test");
57+ if (buffer[4] == 255) {
58+ printf("[PASS] Octal \\377 (255) escape sequence works\n");
59+ passed++;
60+ } else {
61+ printf("[FAIL] Octal \\377 escape sequence failed (got %d, expected 255) - UTF-8 encoding issue\n", buffer[4]);
62+ }
63+
64+ // Test 6: Single digit octal \007 (BEL) - Go-style 3 digits
65+ total++;
66+ sprintf(buffer, "bel:\007test");
67+ if (buffer[4] == 7) {
68+ printf("[PASS] Go-style octal \\007 (BEL) works\n");
69+ passed++;
70+ } else {
71+ printf("[FAIL] Go-style octal \\007 failed (got %d, expected 7)\n", buffer[4]);
72+ }
73+
74+ // Test 7: Two digit octal \012 (newline) - Go-style 3 digits
75+ total++;
76+ sprintf(buffer, "lf:\012test");
77+ if (buffer[3] == 10) { // ASCII 10 = LF
78+ printf("[PASS] Go-style octal \\012 (LF) works\n");
79+ passed++;
80+ } else {
81+ printf("[FAIL] Go-style octal \\012 failed (got %d, expected 10)\n", buffer[3]);
82+ }
83+
84+ // Test 8: ANSI color sequence (real world usage)
85+ total++;
86+ printf("\n--- Testing ANSI Color Sequences ---\n");
87+ sprintf(buffer, "\033[31mRED\033[0m");
88+ if (buffer[0] == 27 && buffer[1] == '[' && buffer[2] == '3' && buffer[3] == '1' && buffer[4] == 'm') {
89+ printf("[PASS] ANSI color sequence parsed correctly: ");
90+ printf(buffer); // This should display "RED" in red if terminal supports it
91+ printf("\n");
92+ passed++;
93+ } else {
94+ printf("[FAIL] ANSI color sequence failed\n");
95+ }
96+
97+ // Test 9: Mixed escape sequences
98+ total++;
99+ sprintf(buffer, "Line1\nLine2\t\033[32mGreen\033[0m\\\042End\042");
100+ if (buffer[5] == '\n' && buffer[11] == '\t' && buffer[12] == 27 && buffer[26] == '\\' && buffer[27] == '"') {
101+ printf("[PASS] Mixed escape sequences work correctly\n");
102+ passed++;
103+ } else {
104+ printf("[FAIL] Mixed escape sequences failed\n");
105+ }
106+
107+ // Test 10: Character literals with octal escapes - Go-style
108+ total++;
109+ auto esc_char = '\033';
110+ auto null_char = '\000';
111+ if (esc_char == 27 && null_char == 0) {
112+ printf("[PASS] Character literals with Go-style octal escapes work\n");
113+ passed++;
114+ } else {
115+ printf("[FAIL] Character literals with Go-style octal escapes failed\n");
116+ }
117+
118+ // Test 11: Go-style octal sequences (exactly 3 digits)
119+ total++;
120+ sprintf(buffer, "a\000b\000c\000\060d"); // \000 = null, \060 = '0'
121+ if (buffer[1] == 0 && buffer[3] == 0 && buffer[5] == 0 && buffer[6] == '0') {
122+ printf("[PASS] Go-style 3-digit octal sequences work\n");
123+ passed++;
124+ } else {
125+ printf("[FAIL] Go-style 3-digit octal sequences failed\n");
126+ }
127+
128+ // Test 12: Boundary octal values
129+ total++;
130+ sprintf(buffer, "\001\010\077\100\177"); // 1, 8, 63, 64, 127
131+ if (buffer[0] == 1 && buffer[1] == 8 && buffer[2] == 63 && buffer[3] == 64 && buffer[4] == 127) {
132+ printf("[PASS] Boundary octal values work correctly\n");
133+ passed++;
134+ } else {
135+ printf("[FAIL] Boundary octal values failed\n");
136+ }
137+
138+ // Final results
139+ printf("\n=== Test Results ===\n");
140+ printf("Passed: %d/%d tests\n", passed, total);
141+ if (passed == total) {
142+ printf("All escape sequence tests PASSED!\n");
143+ return (0);
144+ } else {
145+ printf("Some tests FAILED. Parser may have issues.\n");
146+ return (1);
147+ }
148+}
+15,
-0
1@@ -0,0 +1,15 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x, y;
6+ x = 1.25;
7+ y = 2.75;
8+ printf("x = %.3f, y = %.3f\n", x, y);
9+ float a, b;
10+ a = 8.50;
11+ b = 7.50;
12+ printf("a = %.3f, b = %.3f\n", a, b);
13+ // X and Y once again
14+ printf("x = %.3f, y = %.3f\n", x, y);
15+ return (0);
16+}
+23,
-0
1@@ -0,0 +1,23 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x, y, z;
6+
7+ // Try accessing them before any assignment to see uninitialized values
8+ printf("Initial: x = %.3f, y = %.3f, z = %.3f\n", x, y, z);
9+
10+ // Assign values
11+ x = 1.25;
12+ y = 2.75;
13+ z = x + y; // This should be 4.0
14+
15+ printf("After assignment: x = %.3f, y = %.3f, z = %.3f\n", x, y, z);
16+
17+ // Try some arithmetic
18+ x = x * 2.0; // Should be 2.5
19+ y = y / 2.0; // Should be 1.375
20+
21+ printf("After arithmetic: x = %.3f, y = %.3f, z = %.3f\n", x, y, z);
22+
23+ return (0);
24+}
+14,
-0
1@@ -0,0 +1,14 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x, y, z;
6+ x = 1.25;
7+ y = 2.75;
8+ printf("x type: %s\n", typeof(x));
9+ printf("y type: %s\n", typeof(y));
10+ printf("x + y type: %s\n", typeof(x + y));
11+ z = x + y;
12+ printf("z type: %s\n", typeof(z));
13+ printf("z = %.3f\n", z);
14+ return (0);
15+}
+20,
-0
1@@ -0,0 +1,20 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x, y;
6+ printf("Before: x = %.3f, y = %.3f\n", x, y);
7+
8+ // Perform some float operations first
9+ float a = 8.50;
10+ float b = 7.50;
11+ printf("After float ops: a = %.3f, b = %.3f\n", a, b);
12+
13+ // Now assign to float32 variables
14+ x = 1.25;
15+ y = 2.75;
16+ printf("After assignment: x = %.3f, y = %.3f\n", x, y);
17+
18+ // Check if they maintain their values
19+ printf("Final check: x = %.3f, y = %.3f\n", x, y);
20+ return (0);
21+}
+10,
-0
1@@ -0,0 +1,10 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x, y; // Declare but do not initialize
6+ printf("Before assignment: x = %.3f, y = %.3f\n", x, y);
7+ x = 1.25;
8+ y = 2.75;
9+ printf("After assignment: x = %.3f, y = %.3f\n", x, y);
10+ return (0);
11+}
+16,
-0
1@@ -0,0 +1,16 @@
2+extrn printf;
3+
4+main() {
5+ float float_array[3];
6+ float32 float32_array[3];
7+
8+ float_array[0], float32_array[0] = 1.1, 1.25;
9+ float_array[1], float32_array[1] = 2.2, 2.75;
10+ float_array[2], float32_array[2] = 3.3, 3.125;
11+
12+ printf("float[0]: %g, float32[0]: %g\n", float_array[0], float32_array[0]);
13+ printf("float[1]: %g, float32[1]: %g\n", float_array[1], float32_array[1]);
14+ printf("float[2]: %g, float32[2]: %g\n", float_array[2], float32_array[2]);
15+
16+ return(0);
17+}
+21,
-0
1@@ -0,0 +1,21 @@
2+extrn printf;
3+
4+int main() {
5+ // Test same float type combinations
6+ float32 x = 1.5;
7+ float32 y = 2.5;
8+
9+ printf("x (float32): %.3f\n", x);
10+ printf("y (float32): %.3f\n", y);
11+
12+ // Same type operations should preserve type
13+ printf("x + y (float32 + float32): %.3f\n", x + y); // Should be float32
14+ printf("x * y (float32 * float32): %.3f\n", x * y); // Should be float32
15+ printf("x / y (float32 / float32): %.3f\n", x / y); // Should be float32
16+
17+ float64 a = 3.5;
18+ float64 b = 4.5;
19+ printf("a + b (float64 + float64): %.3f\n", a + b); // Should be float64
20+
21+ return (0);
22+}
+49,
-0
1@@ -0,0 +1,49 @@
2+// Tests case statements with integers and floats, using extrn sprintf and printf.
3+
4+extrn sprintf, printf;
5+
6+main() {
7+ auto buffer 100;
8+ auto x = 2;
9+ auto y = 3.14;
10+ auto z = 2.71;
11+
12+ // --- Integer Test ---
13+ printf("--- Testing Integer Switch ---\n");
14+ switch (x) {
15+ case 1:
16+ printf("x is one\n");
17+ break;
18+ case 2, 3:
19+ printf("x is two or three\n");
20+ break;
21+ default:
22+ printf("x is something else\n");
23+ }
24+ printf("Integer test passed.\n\n");
25+
26+ // --- Float Test ---
27+ printf("--- Testing Float Operations ---\n");
28+ auto result = y * z; // 3.14 * 2.71 = 8.5094
29+
30+ // We can't switch on floats, so we'll use if/else to check the result
31+ // and sprintf/printf to verify the value.
32+ if (result > 8.5 && result < 8.51) {
33+ sprintf(buffer, "Float multiplication successful: %f * %f = %f\n", y, z, result);
34+ printf(buffer);
35+ } else {
36+ sprintf(buffer, "Float multiplication failed: %f * %f = %f\n", y, z, result);
37+ printf(buffer);
38+ }
39+
40+ auto div_res = y / 2.0; // 3.14 / 2.0 = 1.57
41+ if (div_res > 1.56 && div_res < 1.58) {
42+ sprintf(buffer, "Float division successful: %f / 2.0 = %f\n", y, div_res);
43+ printf(buffer);
44+ } else {
45+ sprintf(buffer, "Float division failed: %f / 2.0 = %f\n", y, div_res);
46+ printf(buffer);
47+ }
48+
49+ return(0);
50+}
+47,
-0
1@@ -0,0 +1,47 @@
2+#include <stdio.h>
3+
4+int main(void) {
5+ char buffer[100];
6+ int x = 2;
7+ double y = 3.14;
8+ double z = 2.71;
9+
10+ // --- Integer Test ---
11+ printf("--- Testing Integer Switch ---\n");
12+ switch (x) {
13+ case 1:
14+ printf("x is one\n");
15+ break;
16+ case 2:
17+ case 3:
18+ printf("x is two or three\n");
19+ break;
20+ default:
21+ printf("x is something else\n");
22+ break;
23+ }
24+ printf("Integer test passed.\n\n");
25+
26+ // --- Float Test ---
27+ printf("--- Testing Float Operations ---\n");
28+ double result = y * z; // 3.14 * 2.71 = 8.5094
29+
30+ if (result > 8.5 && result < 8.51) {
31+ sprintf(buffer, "Float multiplication successful: %f * %f = %f\n", y, z, result);
32+ printf("%s", buffer);
33+ } else {
34+ sprintf(buffer, "Float multiplication failed: %f * %f = %f\n", y, z, result);
35+ printf("%s", buffer);
36+ }
37+
38+ double div_res = y / 2.0; // 3.14 / 2.0 = 1.57
39+ if (div_res > 1.56 && div_res < 1.58) {
40+ sprintf(buffer, "Float division successful: %f / 2.0 = %f\n", y, div_res);
41+ printf("%s", buffer);
42+ } else {
43+ sprintf(buffer, "Float division failed: %f / 2.0 = %f\n", y, div_res);
44+ printf("%s", buffer);
45+ }
46+
47+ return 0;
48+}
+8,
-0
1@@ -0,0 +1,8 @@
2+extrn printf;
3+main() {
4+ printf("[%.3f, %.3f, %.3f]\n", 1.25, 2.75, 3.125);
5+ printf("[%s, %s, %s]\n", typeof(1.25), typeof(2.75), typeof(3.125));
6+ printf("[float32(%.3f), float32(%.3f), float32(%.3f)]\n", float32(1.25), float32(2.75), float32(3.125));
7+ printf("[%s, %s, %s]\n", typeof(float32(1.25)), typeof(float32(2.75)), typeof(float32(3.125)));
8+}
9+
+43,
-0
1@@ -0,0 +1,43 @@
2+// Test multi-assignment functionality
3+assert_equal(actual, expected, message) {
4+ extrn printf, abort;
5+ printf("%s: ", message);
6+ if (actual != expected) {
7+ printf("FAIL\n");
8+ abort();
9+ } else {
10+ printf("OK\n");
11+ }
12+}
13+
14+main() {
15+ extrn assert_equal;
16+
17+ // Test 1: Simple two-variable assignment
18+ auto a, b;
19+ a, b = 10, 20;
20+ assert_equal(a, 10, "a, b = 10, 20; a == 10");
21+ assert_equal(b, 20, "a, b = 10, 20; b == 20");
22+
23+ // Test 2: Three-variable assignment
24+ auto x, y, z;
25+ x, y, z = 100, 200, 300;
26+ assert_equal(x, 100, "x, y, z = 100, 200, 300; x == 100");
27+ assert_equal(y, 200, "x, y, z = 100, 200, 300; y == 200");
28+ assert_equal(z, 300, "x, y, z = 100, 200, 300; z == 300");
29+
30+ // Test 3: Array element assignment
31+ auto arr1 3, arr2 3;
32+ arr1[0], arr2[0] = 50, 60;
33+ assert_equal(arr1[0], 50, "arr1[0], arr2[0] = 50, 60; arr1[0] == 50");
34+ assert_equal(arr2[0], 60, "arr1[0], arr2[0] = 50, 60; arr2[0] == 60");
35+
36+ // Test 4: Mixed lvalue types
37+ auto p, q, mixed 2;
38+ p, mixed[1], q = 1000, 2000, 3000;
39+ assert_equal(p, 1000, "p, mixed[1], q = 1000, 2000, 3000; p == 1000");
40+ assert_equal(mixed[1], 2000, "p, mixed[1], q = 1000, 2000, 3000; mixed[1] == 2000");
41+ assert_equal(q, 3000, "p, mixed[1], q = 1000, 2000, 3000; q == 3000");
42+
43+ return(0);
44+}
+32,
-0
1@@ -0,0 +1,32 @@
2+// Test multi-assignment functionality with Bx typed syntax
3+assert_equal(actual, expected, message) {
4+ extrn printf, abort;
5+ printf("%s: ", message);
6+ if (actual != expected) {
7+ printf("FAIL\n");
8+ abort();
9+ } else {
10+ printf("OK\n");
11+ }
12+}
13+
14+main() {
15+ extrn assert_equal;
16+
17+ // Test with typed variables and arrays (Bx style)
18+ int int_array[3];
19+ int other_array[3];
20+
21+ int_array[0], other_array[0] = 42, 84;
22+ int_array[1], other_array[1] = 123, 246;
23+ int_array[2], other_array[2] = 999, 1998;
24+
25+ assert_equal(int_array[0], 42, "int_array[0], other_array[0] = 42, 84; int_array[0] == 42");
26+ assert_equal(other_array[0], 84, "int_array[0], other_array[0] = 42, 84; other_array[0] == 84");
27+ assert_equal(int_array[1], 123, "int_array[1], other_array[1] = 123, 246; int_array[1] == 123");
28+ assert_equal(other_array[1], 246, "int_array[1], other_array[1] = 123, 246; other_array[1] == 246");
29+ assert_equal(int_array[2], 999, "int_array[2], other_array[2] = 999, 1998; int_array[2] == 999");
30+ assert_equal(other_array[2], 1998, "int_array[2], other_array[2] = 999, 1998; other_array[2] == 1998");
31+
32+ return(0);
33+}
+8,
-0
1@@ -0,0 +1,8 @@
2+extrn printf;
3+
4+int main() {
5+ float32 x;
6+ x = 1.25;
7+ printf("x = %.3f\n", x);
8+ return (0);
9+}
+55,
-0
1@@ -0,0 +1,55 @@
2+type struct Point {
3+ x, y int;
4+};
5+
6+type struct Vector {
7+ x int;
8+ y int;
9+// z float32;
10+};
11+
12+type struct S {
13+ a float32;
14+ b float32;
15+ c float32;
16+ z int;
17+};
18+
19+// Was expecting a failure here due to trying to use positional struct literal return even tho there are more than one kind of type in the struct type
20+Vector createVector(x, y int) {
21+ //return (Vector{x, y, 0});
22+ return (Vector{x, y});
23+}
24+
25+S createS(a, b, c float32, z int) {
26+ return (S{a: a, b: b, c: c, z: z});
27+}
28+
29+main() {
30+ extrn printf;
31+
32+ auto p = Point{x: 10, y: 20};
33+ printf("p.x: %d\n", p.x);
34+ printf("p.y: %d\n", p.y);
35+
36+ auto p2 = Point{2, 4};
37+ printf("p2.x: %d\n", p2.x);
38+ printf("p2.y: %d\n", p2.y);
39+
40+ auto v = Vector{16, 32};
41+ printf("v.x: %d\n", v.x);
42+ printf("v.y: %d\n", v.y);
43+
44+ auto v2 = createVector(5, 10);
45+ printf("v2.x: %d\n", v2.x);
46+ printf("v2.y: %d\n", v2.y);
47+
48+ auto s = createS(0.1, 0.2, 0.3, 50);
49+ printf("s.a: %f\n", s.a);
50+ printf("s.b: %f\n", s.b);
51+ printf("s.c: %f\n", s.c);
52+ printf("s.z: %d\n", s.z);
53+
54+ return(0);
55+}
56+
+101,
-0
1@@ -0,0 +1,101 @@
2+// Comprehensive tests for typeof and literal type inference
3+extrn printf;
4+
5+main() {
6+ printf("=== Basic Type Tests ===\n");
7+
8+ // Primitive types
9+ int i;
10+ i = 42;
11+ printf("int: %s\n", typeof(i));
12+
13+ uint ui;
14+ ui = 42;
15+ printf("uint: %s\n", typeof(ui));
16+
17+ int8 i8;
18+ i8 = 42;
19+ printf("int8: %s\n", typeof(i8));
20+
21+ uint8 ui8;
22+ ui8 = 42;
23+ printf("uint8: %s\n", typeof(ui8));
24+
25+ int16 i16;
26+ i16 = 42;
27+ printf("int16: %s\n", typeof(i16));
28+
29+ uint16 ui16;
30+ ui16 = 42;
31+ printf("uint16: %s\n", typeof(ui16));
32+
33+ int32 i32;
34+ i32 = 42;
35+ printf("int32: %s\n", typeof(i32));
36+
37+ uint32 ui32;
38+ ui32 = 42;
39+ printf("uint32: %s\n", typeof(ui32));
40+
41+ int64 i64;
42+ i64 = 42;
43+ printf("int64: %s\n", typeof(i64));
44+
45+ uint64 ui64;
46+ ui64 = 42;
47+ printf("uint64: %s\n", typeof(ui64));
48+
49+ // Float types
50+ float f;
51+ f = 3.14;
52+ printf("float: %s\n", typeof(f));
53+
54+ float32 f32;
55+ f32 = 3.14;
56+ printf("float32: %s\n", typeof(f32));
57+
58+ float64 f64;
59+ f64 = 3.14;
60+ printf("float64: %s\n", typeof(f64));
61+
62+ // Other types
63+ bool b;
64+ b = 1;
65+ printf("bool: %s\n", typeof(b));
66+
67+ byte by;
68+ by = 65;
69+ printf("byte: %s\n", typeof(by));
70+
71+ printf("\n=== Literal Types ===\n");
72+
73+ // Integer literals should default to int
74+ printf("42: %s\n", typeof(42));
75+ printf("0x42: %s\n", typeof(0x42));
76+ printf("0755: %s\n", typeof(0755));
77+
78+ // Float literals should default to float
79+ printf("3.14: %s\n", typeof(3.14));
80+ printf("0.5: %s\n", typeof(0.5));
81+ printf("1.0e-5: %s\n", typeof(1.0e-5));
82+
83+ // String literals
84+ printf("\"hello\": %s\n", typeof("hello"));
85+
86+ // Nil
87+ printf("nil: %s\n", typeof(nil));
88+
89+ printf("\n=== Auto Variables ===\n");
90+
91+ // Auto variables with literals
92+ auto auto_int = 42;
93+ printf("auto int: %s\n", typeof(auto_int));
94+
95+ auto auto_float = 3.14;
96+ printf("auto float: %s\n", typeof(auto_float));
97+
98+ auto auto_string = "hello";
99+ printf("auto string: %s\n", typeof(auto_string));
100+
101+ return(0);
102+}
+120,
-0
1@@ -0,0 +1,120 @@
2+// Tests for typeof with pointers, arrays, and structs
3+extrn printf;
4+
5+type struct Point {
6+ x int;
7+ y int;
8+};
9+
10+type struct FloatPoint {
11+ x float32;
12+ y float32;
13+};
14+
15+type enum Color {
16+ RED,
17+ GREEN,
18+ BLUE
19+};
20+
21+void testPointers() {
22+ printf("=== Pointer Types ===\n");
23+
24+ int value;
25+ value = 42;
26+ printf("int value: %s\n", typeof(value));
27+
28+ auto ptr = &value;
29+ printf("int pointer: %s\n", typeof(ptr));
30+
31+ auto deref = *ptr;
32+ printf("dereferenced: %s\n", typeof(deref));
33+
34+ // Different pointer types
35+ float32 float_val;
36+ float_val = 3.14;
37+ auto float_ptr = &float_val;
38+ printf("float32 pointer: %s\n", typeof(float_ptr));
39+}
40+
41+void testArrays() {
42+ printf("\n=== Array Types ===\n");
43+
44+ int int_array[5];
45+ printf("int array: %s\n", typeof(int_array));
46+
47+ float32 float_array[3];
48+ printf("float32 array: %s\n", typeof(float_array));
49+
50+ // Array element access
51+ int_array[0] = 10;
52+ printf("array element: %s\n", typeof(int_array[0]));
53+
54+ float_array[0] = 1.5;
55+ printf("float array element: %s\n", typeof(float_array[0]));
56+}
57+
58+void testStructs() {
59+ printf("\n=== Struct Types ===\n");
60+
61+ Point p;
62+ printf("Point struct: %s\n", typeof(p));
63+
64+ p.x = 10;
65+ p.y = 20;
66+ printf("struct member x: %s\n", typeof(p.x));
67+ printf("struct member y: %s\n", typeof(p.y));
68+
69+ FloatPoint fp;
70+ fp.x = 1.5;
71+ fp.y = 2.5;
72+ printf("FloatPoint struct: %s\n", typeof(fp));
73+ printf("float struct member: %s\n", typeof(fp.x));
74+
75+ // Struct pointer
76+ auto struct_ptr = &p;
77+ printf("struct pointer: %s\n", typeof(struct_ptr));
78+}
79+
80+void testEnums() {
81+ printf("\n=== Enum Types ===\n");
82+
83+ Color color;
84+ color = RED;
85+ printf("enum Color: %s\n", typeof(color));
86+
87+ auto enum_val = BLUE;
88+ printf("enum value: %s\n", typeof(enum_val));
89+}
90+
91+void testComplexExpressions() {
92+ printf("\n=== Complex Expressions ===\n");
93+
94+ auto a = 10;
95+ auto b = 20;
96+
97+ // Arithmetic expressions
98+ printf("a + b: %s\n", typeof(a + b));
99+ printf("a * b: %s\n", typeof(a * b));
100+
101+ auto x = 1.5;
102+ auto y = 2.5;
103+ printf("x + y: %s\n", typeof(x + y));
104+
105+ // Mixed type expressions
106+ printf("a + x: %s\n", typeof(a + x));
107+
108+ // Comparison expressions
109+ printf("a > b: %s\n", typeof(a > b));
110+ printf("x < y: %s\n", typeof(x < y));
111+}
112+
113+main() {
114+ testPointers();
115+ testArrays();
116+ testStructs();
117+ testEnums();
118+ testComplexExpressions();
119+
120+ return(0);
121+}
+77,
-0
1@@ -0,0 +1,77 @@
2+// Tests for typeof in control flow contexts (if, switch/case)
3+extrn printf;
4+
5+void testIf() {
6+ printf("=== If Statement Context ===\n");
7+
8+ auto x = 42;
9+ auto y = 3.14;
10+
11+ if (x > 0) {
12+ printf("x is positive: %s\n", typeof(x));
13+
14+ if (y > 3.0) {
15+ printf("y is > 3.0: %s\n", typeof(y));
16+ }
17+ }
18+
19+ // Test typeof in condition expressions
20+ auto condition_int = 1;
21+ auto condition_float = 0.0;
22+
23+ if (condition_int) {
24+ printf("condition int: %s\n", typeof(condition_int));
25+ }
26+
27+ if (!condition_float) {
28+ printf("condition float: %s\n", typeof(condition_float));
29+ }
30+}
31+
32+void testSwitch() {
33+ printf("\n=== Switch/Case Context ===\n");
34+
35+ auto value = 2;
36+ printf("switch value: %s\n", typeof(value));
37+
38+ switch (value) {
39+ case 1:
40+ printf("case 1: %s\n", typeof(value));
41+ break;
42+ case 2:
43+ printf("case 2: %s\n", typeof(value));
44+ break;
45+ default:
46+ printf("default: %s\n", typeof(value));
47+ break;
48+ }
49+
50+ // Test with different types
51+ auto float_value = 1.5;
52+ printf("float switch value: %s\n", typeof(float_value));
53+
54+ // Note: Bx may not support float in switch, but we can test the type
55+ if (float_value > 1.0) {
56+ printf("float > 1.0: %s\n", typeof(float_value));
57+ }
58+}
59+
60+void testLoop() {
61+ printf("\n=== Loop Context ===\n");
62+
63+ auto counter = 0;
64+ printf("loop counter: %s\n", typeof(counter));
65+
66+ while (counter < 3) {
67+ printf("in loop [%d]: %s\n", counter, typeof(counter));
68+ counter = counter + 1;
69+ }
70+}
71+
72+main() {
73+ testIf();
74+ testSwitch();
75+ testLoop();
76+
77+ return(0);
78+}
+53,
-0
1@@ -0,0 +1,53 @@
2+// Tests for typeof in function parameter contexts
3+extrn printf;
4+
5+// Test function with various parameter types
6+void testFunc(i int, f32 float32, u8 uint8, f64 float64) {
7+ printf("In testFunc:\n");
8+ printf(" param int: %s\n", typeof(i));
9+ printf(" param float32: %s\n", typeof(f32));
10+ printf(" param uint8: %s\n", typeof(u8));
11+ printf(" param float64: %s\n", typeof(f64));
12+}
13+
14+// Test function calls with literals - verify type coercion
15+void testLiteralCoercion() {
16+ printf("\n=== Function Parameter Type Coercion ===\n");
17+
18+ // Call with literals - should get coerced to parameter types
19+ testFunc(42, 3.14, 255, 2.718);
20+
21+ // Test in assignment context
22+ printf("\n=== Assignment Context ===\n");
23+
24+ float32 assigned_literal;
25+ assigned_literal = 0.5; // literal float should be coerced to float32
26+ printf("assigned literal to float32: %s\n", typeof(assigned_literal));
27+
28+ int32 assigned_int;
29+ assigned_int = 100; // literal int should be coerced to int32
30+ printf("assigned literal to int32: %s\n", typeof(assigned_int));
31+}
32+
33+// Test return types
34+float32 returnFloat32() {
35+ return (2.5); // literal should be coerced to float32
36+}
37+
38+int16 returnInt16() {
39+ return (42); // literal should be coerced to int16
40+}
41+
42+main() {
43+ testLiteralCoercion();
44+
45+ printf("\n=== Return Type Context ===\n");
46+
47+ auto ret_f32 = returnFloat32();
48+ printf("returned float32: %s\n", typeof(ret_f32));
49+
50+ auto ret_i16 = returnInt16();
51+ printf("returned int16: %s\n", typeof(ret_i16));
52+
53+ return(0);
54+}
+51,
-0
1@@ -0,0 +1,51 @@
2+// Tests for typeof in function parameter contexts
3+extrn printf;
4+
5+// Test function with simple parameters
6+void testFunc(i int, f float) {
7+ printf("In testFunc:\n");
8+ printf(" param int: %s\n", typeof(i));
9+ printf(" param float: %s\n", typeof(f));
10+}
11+
12+// Test function calls with literals - verify type coercion
13+void testLiteralCoercion() {
14+ printf("\n=== Function Parameter Type Coercion ===\n");
15+
16+ // Call with literals - should get coerced to parameter types
17+ testFunc(42, 3.14);
18+
19+ // Test in assignment context
20+ printf("\n=== Assignment Context ===\n");
21+
22+ float assigned_literal;
23+ assigned_literal = 0.5; // literal float should be coerced to float
24+ printf("assigned literal to float: %s\n", typeof(assigned_literal));
25+
26+ int assigned_int;
27+ assigned_int = 100; // literal int should be coerced to int
28+ printf("assigned literal to int: %s\n", typeof(assigned_int));
29+}
30+
31+// Test return types
32+float returnFloat() {
33+ return (2.5); // literal should be coerced to float
34+}
35+
36+int returnInt() {
37+ return (42); // literal should be coerced to int
38+}
39+
40+main() {
41+ testLiteralCoercion();
42+
43+ printf("\n=== Return Type Context ===\n");
44+
45+ auto ret_f = returnFloat();
46+ printf("returned float: %s\n", typeof(ret_f));
47+
48+ auto ret_i = returnInt();
49+ printf("returned int: %s\n", typeof(ret_i));
50+
51+ return(0);
52+}
+95,
-0
1@@ -0,0 +1,95 @@
2+// Tests for typeof with array and struct literals
3+extrn printf;
4+
5+type struct Point {
6+ x int;
7+ y int;
8+};
9+
10+type struct FloatPoint {
11+ x float32;
12+ y float32;
13+};
14+
15+void testStructLiterals() {
16+ printf("=== Struct Literal Types ===\n");
17+
18+ // Named struct literals
19+ auto p1 = Point{x: 10, y: 20};
20+ printf("Point literal: %s\n", typeof(p1));
21+
22+ auto p2 = Point{5, 15};
23+ printf("Point positional literal: %s\n", typeof(p2));
24+
25+ auto fp = FloatPoint{x: 1.5, y: 2.5};
26+ printf("FloatPoint literal: %s\n", typeof(fp));
27+
28+ // Nested struct access
29+ printf("Point member x: %s\n", typeof(p1.x));
30+ printf("FloatPoint member x: %s\n", typeof(fp.x));
31+}
32+
33+void testArrayLiterals() {
34+ printf("\n=== Array Literal Types ===\n");
35+
36+ // Array literals with explicit types
37+ auto int_arr = []int{1, 2, 3, 4, 5};
38+ printf("int array literal: %s\n", typeof(int_arr));
39+
40+ auto float_arr = []float32{1.1, 2.2, 3.3};
41+ printf("float32 array literal: %s\n", typeof(float_arr));
42+
43+ auto str_arr = []string{"hello", "world"};
44+ printf("string array literal: %s\n", typeof(str_arr));
45+
46+ // Array element access
47+ printf("int array element: %s\n", typeof(int_arr[0]));
48+ printf("float array element: %s\n", typeof(float_arr[0]));
49+ printf("string array element: %s\n", typeof(str_arr[0]));
50+}
51+
52+void testMixedLiterals() {
53+ printf("\n=== Mixed Literal Expressions ===\n");
54+
55+ // Struct with array
56+ auto points = []Point{
57+ Point{x: 1, y: 2},
58+ Point{x: 3, y: 4}
59+ };
60+ printf("Point array: %s\n", typeof(points));
61+ printf("Point array element: %s\n", typeof(points[0]));
62+
63+ // Array in function call context
64+ testArray([]float32{1.0, 2.0, 3.0});
65+}
66+
67+void testArray(float32 arr[]) {
68+ printf("Function array param: %s\n", typeof(arr));
69+ printf("Function array element: %s\n", typeof(arr[0]));
70+}
71+
72+void testLiteralAssignment() {
73+ printf("\n=== Literal Assignment Context ===\n");
74+
75+ // Assign struct literal to typed variable
76+ Point p;
77+ p = Point{x: 100, y: 200};
78+ printf("Assigned struct: %s\n", typeof(p));
79+
80+ // Assign array literal to typed variable
81+ int numbers[3];
82+ // Note: Direct array literal assignment may not be supported
83+ numbers[0] = 1;
84+ numbers[1] = 2;
85+ numbers[2] = 3;
86+ printf("Assigned array: %s\n", typeof(numbers));
87+}
88+
89+main() {
90+ testStructLiterals();
91+ testArrayLiterals();
92+ testMixedLiterals();
93+ testLiteralAssignment();
94+
95+ return(0);
96+}
+74,
-0
1@@ -0,0 +1,74 @@
2+// Tests for typeof with struct literals and simple expressions
3+extrn printf;
4+
5+type struct Point {
6+ x int;
7+ y int;
8+};
9+
10+type struct FloatPoint {
11+ x float32;
12+ y float32;
13+};
14+
15+void testStructLiterals() {
16+ printf("=== Struct Literal Types ===\n");
17+
18+ // Named struct literals
19+ auto p1 = Point{x: 10, y: 20};
20+ printf("Point literal: %s\n", typeof(p1));
21+
22+ auto p2 = Point{5, 15};
23+ printf("Point positional literal: %s\n", typeof(p2));
24+
25+ auto fp = FloatPoint{x: 1.5, y: 2.5};
26+ printf("FloatPoint literal: %s\n", typeof(fp));
27+
28+ // Nested struct access
29+ printf("Point member x: %s\n", typeof(p1.x));
30+ printf("FloatPoint member x: %s\n", typeof(fp.x));
31+}
32+
33+void testLiteralAssignment() {
34+ printf("\n=== Literal Assignment Context ===\n");
35+
36+ // Assign struct literal to typed variable
37+ Point p;
38+ p = Point{x: 100, y: 200};
39+ printf("Assigned struct: %s\n", typeof(p));
40+
41+ // Simple array assignments
42+ int numbers[3];
43+ numbers[0] = 1;
44+ numbers[1] = 2;
45+ numbers[2] = 3;
46+ printf("Assigned array: %s\n", typeof(numbers));
47+ printf("Array element: %s\n", typeof(numbers[0]));
48+}
49+
50+void testMixedExpressions() {
51+ printf("\n=== Mixed Expressions ===\n");
52+
53+ auto int_val = 42;
54+ auto float_val = 3.14;
55+
56+ // Arithmetic with same types
57+ printf("int + int: %s\n", typeof(int_val + int_val));
58+ printf("float + float: %s\n", typeof(float_val + float_val));
59+
60+ // Mixed arithmetic (should promote to float)
61+ printf("int + float: %s\n", typeof(int_val + float_val));
62+ printf("float * int: %s\n", typeof(float_val * int_val));
63+
64+ // Comparison operations
65+ printf("int == int: %s\n", typeof(int_val == int_val));
66+ printf("float > int: %s\n", typeof(float_val > int_val));
67+}
68+
69+main() {
70+ testStructLiterals();
71+ testLiteralAssignment();
72+ testMixedExpressions();
73+
74+ return(0);
75+}
+1,
-1
1@@ -1,4 +1,4 @@
2 main() {
3- auto a = 42;
4+ a := 42;
5 a(); // OK, OK, calling it...
6 }