Skip to content

Commit f74cd5e

Browse files
committed
WIP: Generate an IR.
The compiler has come a long way, but we still can't do int64, because it requires a non-cell storage size. There's no (sane) way to express conversions between int32 and int64 within the AST, because we have no uniform way of inserting conversion nodes. This is already a deep problem that has been hacked around for operator overloads and property accessors, and it doesn't scale. The solution is obvious: transform the AST into an IR. That's what we should have done from the beginning but didn't. Unfortunately it requires a *lot* of refactoring and a ton of boilerplate. So far, I have most of the boilerplate done, but the refactoring is only halfway there. CodeGenerator has not been ported to the IR yet. Once this gigantic patch is done, we'll have the following changes: - `struct value` will be eliminated, and good riddance. - The AST will be immutable after parsing. - The semantic analysis phase will output a new IR tree. - CodeGenerator will generate off the IR instead. Since the IR is a transformation of the AST, I'm expecting minimal changes to the end result. - functag_t will be replaced by FunctionType. V2: CG-IR can now assemble trivial programs. V3: CG-IR supports basic calls; 341 test failures. V4: CG-IR supports binary ops; 333 test failures. V5: CG-IR supports do-while and if; 329 test failures.
1 parent fd82555 commit f74cd5e

34 files changed

+2679
-1961
lines changed

compiler/AMBuilder

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ module.sources += [
1212
'data-queue.cpp',
1313
'errors.cpp',
1414
'expressions.cpp',
15+
'ir.cpp',
1516
'lexer.cpp',
1617
'main.cpp',
1718
'name-resolution.cpp',

compiler/array-helpers.cpp

+14-14
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ bool Semantics::CheckArrayDeclaration(VarDeclBase* decl) {
822822
class CompoundEmitter final
823823
{
824824
public:
825-
CompoundEmitter(Type* type, Expr* init)
825+
CompoundEmitter(QualType type, Expr* init)
826826
: type_(type),
827827
init_(init),
828828
pending_zeroes_(0)
@@ -852,11 +852,11 @@ class CompoundEmitter final
852852
size_t AddString(StringExpr* expr);
853853
void AddInlineArray(LayoutFieldDecl* field, ArrayExpr* expr);
854854
void AddInlineEnumStruct(EnumStructDecl* es, ArrayExpr* expr);
855-
void EmitPadding(size_t rank_size, Type* type, size_t emitted, bool ellipses,
855+
void EmitPadding(size_t rank_size, QualType type, size_t emitted, bool ellipses,
856856
const ke::Maybe<cell> prev1, const ke::Maybe<cell> prev2);
857857

858858
private:
859-
Type* type_;
859+
QualType type_;
860860
Expr* init_;
861861
tr::vector<cell> iv_;
862862
tr::vector<cell> data_;
@@ -948,7 +948,7 @@ cell CompoundEmitter::Emit(ArrayType* rank, Expr* init) {
948948
// This only works because enum structs are flattened and don't support
949949
// internal IVs. No plans to change this as it would greatly increase
950950
// complexity unless we radically changed arrays.
951-
EmitPadding(rank->size(), rank->inner(), emitted, ellipses, prev1, prev2);
951+
EmitPadding(rank->size(), QualType(rank->inner()), emitted, ellipses, prev1, prev2);
952952

953953
return (start * sizeof(cell)) | kDataFlag;
954954
}
@@ -967,7 +967,7 @@ void CompoundEmitter::AddInlineEnumStruct(EnumStructDecl* es, ArrayExpr* array)
967967
assert(field);
968968

969969
auto rank_type = field->type()->to<ArrayType>();
970-
EmitPadding(rank_type->size(), rank_type->inner(), emitted, false, {}, {});
970+
EmitPadding(rank_type->size(), QualType(rank_type->inner()), emitted, false, {}, {});
971971
} else if (ArrayExpr* expr = item->as<ArrayExpr>()) {
972972
// Subarrays can only appear in an enum struct. Normal 2D cases
973973
// would flow through the check at the start of this function.
@@ -994,12 +994,12 @@ void CompoundEmitter::AddInlineArray(LayoutFieldDecl* field, ArrayExpr* array) {
994994
}
995995

996996
auto rank_size = field->type()->to<ArrayType>()->size();
997-
EmitPadding(rank_size, field->type(), array->exprs().size(),
997+
EmitPadding(rank_size, QualType(field->type()), array->exprs().size(),
998998
array->ellipses(), prev1, prev2);
999999
}
10001000

10011001
void
1002-
CompoundEmitter::EmitPadding(size_t rank_size, Type* type, size_t emitted, bool ellipses,
1002+
CompoundEmitter::EmitPadding(size_t rank_size, QualType type, size_t emitted, bool ellipses,
10031003
const ke::Maybe<cell> prev1, const ke::Maybe<cell> prev2)
10041004
{
10051005
// Pad remainder to zeroes if the array was explicitly sized.
@@ -1053,20 +1053,20 @@ CompoundEmitter::add_data(cell value)
10531053
data_.emplace_back(value);
10541054
}
10551055

1056-
void BuildCompoundInitializer(Type* type, Expr* init, ArrayData* array) {
1056+
void BuildCompoundInitializer(QualType type, Expr* init, ArrayData* array,
1057+
std::optional<cell_t> base_address)
1058+
{
10571059
CompoundEmitter emitter(type, init);
10581060
emitter.Emit();
10591061

10601062
array->iv = std::move(emitter.iv());
10611063
array->data = std::move(emitter.data());
10621064
array->zeroes = emitter.pending_zeroes();
1063-
}
10641065

1065-
void BuildCompoundInitializer(VarDeclBase* decl, ArrayData* array, cell base_address) {
1066-
BuildCompoundInitializer(decl->type(), decl->init_rhs(), array);
1067-
1068-
for (auto& v : array->iv)
1069-
v += base_address;
1066+
if (base_address) {
1067+
for (auto& v : array->iv)
1068+
v += *base_address;
1069+
}
10701070
}
10711071

10721072
} // namespace cc

compiler/array-helpers.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ bool ResolveArrayType(Semantics* sema, const token_pos_t& pos, typeinfo_t* type,
3838
// Perform type and size checks of an array and its initializer if present.
3939
bool CheckArrayInitialization(Semantics* sema, const typeinfo_t& type, Expr* init);
4040

41-
void BuildCompoundInitializer(VarDeclBase* decl, ArrayData* array, cell_t base_addr);
42-
void BuildCompoundInitializer(Type* type, Expr* init, ArrayData* array);
41+
void BuildCompoundInitializer(QualType type, Expr* init, ArrayData* array,
42+
std::optional<cell_t> base_address = {});
4343

4444
cell_t CalcArraySize(Type* type);
4545

compiler/assembler.cpp

+43-59
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ struct function_entry {
6565
function_entry(const function_entry& other) = delete;
6666
function_entry& operator =(const function_entry& other) = delete;
6767

68-
FunctionDecl* decl = nullptr;
68+
ir::Function* fun;
6969
std::string name;
7070
};
7171

@@ -123,7 +123,7 @@ class RttiBuilder
123123
RttiBuilder(CompileContext& cc, CodeGenerator& cg, SmxNameTable* names);
124124

125125
void finish(SmxBuilder& builder);
126-
void add_method(FunctionDecl* fun);
126+
void add_method(ir::Function* fun);
127127
void add_native(FunctionDecl* sym);
128128

129129
private:
@@ -360,17 +360,19 @@ RttiBuilder::add_debug_var(SmxRttiTable<smx_rtti_debug_var>* table, DebugString&
360360
var.type_id = type_id;
361361
}
362362

363-
void RttiBuilder::add_method(FunctionDecl* fun) {
363+
void RttiBuilder::add_method(ir::Function* fun) {
364364
assert(fun->is_live());
365365

366366
uint32_t index = methods_->count();
367367
smx_rtti_method& method = methods_->add();
368-
method.name = names_->add(fun->name());
369-
method.pcode_start = fun->cg()->label.offset();
370-
method.pcode_end = fun->cg()->pcode_end;
371-
method.signature = encode_signature(fun->canonical());
372-
373-
if (!fun->cg()->dbgstrs)
368+
method.name = names_->add(fun->decl()->name());
369+
method.pcode_start = fun->label().offset();
370+
method.pcode_end = fun->pcode_end();
371+
method.signature = encode_signature(fun->decl()->canonical());
372+
373+
(void)index;
374+
#if 0
375+
if (!fun->dbgstrs)
374376
return;
375377

376378
smx_rtti_debug_method debug;
@@ -392,6 +394,7 @@ void RttiBuilder::add_method(FunctionDecl* fun) {
392394
// Only add a method table entry if we actually had locals.
393395
if (debug.first_local != dbg_locals_->count())
394396
dbg_methods_->add(debug);
397+
#endif
395398
}
396399

397400
void RttiBuilder::add_native(FunctionDecl* fun) {
@@ -721,59 +724,44 @@ Assembler::Assemble(SmxByteBuffer* buffer)
721724
std::vector<function_entry> functions;
722725
std::unordered_set<Decl*> symbols;
723726

724-
// Sort globals.
725-
std::vector<Decl*> global_symbols;
726-
cc_.globals()->ForEachSymbol([&](Decl* decl) -> void {
727-
global_symbols.push_back(decl);
727+
auto mod = cg_.mod();
728728

729-
// This is only to assert that we embedded pointers properly in the assembly buffer.
730-
symbols.emplace(decl);
729+
// Sort globals.
730+
std::sort(mod->functions().begin(), mod->functions().end(),
731+
[](const ir::Function* a, const ir::Function* b) {
732+
return a->decl()->name()->str() < b->decl()->name()->str();
731733
});
732-
for (const auto& decl : cc_.functions()) {
733-
if (symbols.count(decl))
734-
continue;
735-
if (decl->canonical() != decl)
734+
735+
for (const auto& fun : mod->functions()) {
736+
auto decl = fun->decl();
737+
738+
if (decl->is_native() || !fun->body())
736739
continue;
737-
global_symbols.push_back(decl);
738-
symbols.emplace(decl);
739-
}
740740

741-
std::sort(global_symbols.begin(), global_symbols.end(),
742-
[](const Decl* a, const Decl *b) -> bool {
743-
return a->name()->str() < b->name()->str();
744-
});
741+
function_entry entry;
742+
entry.fun = fun;
743+
if (decl->is_public()) {
744+
entry.name = decl->name()->str();
745+
} else {
746+
// Create a private name.
747+
entry.name = ke::StringPrintf(".%d.%s", fun->label().offset(), decl->name()->chars());
748+
}
749+
750+
functions.emplace_back(std::move(entry));
751+
}
745752

753+
#if 0
746754
// Build the easy symbol tables.
747755
for (const auto& decl : global_symbols) {
748-
if (auto fun = decl->as<FunctionDecl>()) {
749-
if (fun->is_native())
750-
continue;
751-
752-
if (!fun->body())
753-
continue;
754-
if (!fun->is_live())
755-
continue;
756-
if (fun->canonical() != fun)
757-
continue;
758-
759-
function_entry entry;
760-
entry.decl = fun;
761-
if (fun->is_public()) {
762-
entry.name = fun->name()->str();
763-
} else {
764-
// Create a private name.
765-
entry.name = ke::StringPrintf(".%d.%s", fun->cg()->label.offset(), fun->name()->chars());
766-
}
767-
768-
functions.emplace_back(std::move(entry));
769-
} else if (auto var = decl->as<VarDecl>()) {
756+
if (auto var = decl->as<VarDecl>()) {
770757
if (var->is_public() || (var->is_used() && !var->as<ConstDecl>())) {
771758
sp_file_pubvars_t& pubvar = pubvars->add();
772759
pubvar.address = var->addr();
773760
pubvar.name = names->add(var->name());
774761
}
775762
}
776763
}
764+
#endif
777765

778766
// The public list must be sorted.
779767
std::sort(functions.begin(), functions.end(),
@@ -783,31 +771,27 @@ Assembler::Assemble(SmxByteBuffer* buffer)
783771
for (size_t i = 0; i < functions.size(); i++) {
784772
function_entry& f = functions[i];
785773

786-
assert(f.decl->cg()->label.offset() > 0);
787-
assert(f.decl->impl());
788-
assert(f.decl->cg()->pcode_end > f.decl->cg()->label.offset());
789-
790774
sp_file_publics_t& pubfunc = publics->add();
791-
pubfunc.address = f.decl->cg()->label.offset();
775+
pubfunc.address = f.fun->label().offset();
792776
pubfunc.name = names->add(*cc_.atoms(), f.name.c_str());
793777

794778
auto id = (uint32_t(i) << 1) | 1;
795779
if (!Label::ValueFits(id))
796780
report(421);
797-
cg_.LinkPublicFunction(f.decl, id);
781+
cg_.LinkPublicFunction(f.fun, id);
798782

799-
rtti.add_method(f.decl);
783+
rtti.add_method(f.fun);
800784
}
801785

802786
// Populate the native table.
803787
for (size_t i = 0; i < cg_.native_list().size(); i++) {
804-
FunctionDecl* sym = cg_.native_list()[i];
805-
assert(size_t(sym->cg()->label.offset()) == i);
788+
ir::Function* sym = cg_.native_list()[i];
789+
assert(size_t(sym->label().offset()) == i);
806790

807791
sp_file_natives_t& entry = natives->add();
808-
entry.name = names->add(sym->name());
792+
entry.name = names->add(sym->decl()->name());
809793

810-
rtti.add_native(sym);
794+
rtti.add_native(sym->decl());
811795
}
812796

813797
// Set up the code section.

compiler/assembler.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "libsmx/data-pool.h"
2727
#include "libsmx/smx-builder.h"
2828
#include "libsmx/smx-encoding.h"
29+
#include "ir.h"
2930
#include "sc.h"
3031
#include "shared/byte-buffer.h"
3132
#include "shared/string-pool.h"

compiler/ast-types.h

+54
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
// 3. This notice may not be removed or altered from any source distribution.
2020
#pragma once
2121

22+
#include <stdint.h>
23+
2224
#define AST_STMT_TYPE_LIST(FOR_EACH) \
2325
FOR_EACH(StmtList) \
2426
FOR_EACH(BlockStmt) \
@@ -79,6 +81,48 @@
7981
FOR_EACH(StructExpr) \
8082
FOR_EACH(StructInitFieldExpr)
8183

84+
#define IR_NODE_TYPE_LIST(FOR_EACH) \
85+
/* Decls */ \
86+
FOR_EACH(Function) \
87+
FOR_EACH(Variable) \
88+
/* Statements */ \
89+
FOR_EACH(Return) \
90+
FOR_EACH(ValueInsn) \
91+
FOR_EACH(Exit) \
92+
FOR_EACH(Break) \
93+
FOR_EACH(Continue) \
94+
FOR_EACH(Assert) \
95+
FOR_EACH(If) \
96+
FOR_EACH(DoWhile) \
97+
FOR_EACH(Delete) \
98+
FOR_EACH(ForLoop) \
99+
FOR_EACH(Switch) \
100+
FOR_EACH(FunctionDef) \
101+
/* Values */ \
102+
FOR_EACH(ConstVal) \
103+
FOR_EACH(CharArrayLiteral) \
104+
FOR_EACH(VariableRef) \
105+
FOR_EACH(TypeRef) \
106+
FOR_EACH(FunctionRef) \
107+
FOR_EACH(IndexOp) \
108+
FOR_EACH(Load) \
109+
FOR_EACH(TernaryOp) \
110+
FOR_EACH(BinaryOp) \
111+
FOR_EACH(Array) \
112+
FOR_EACH(CommaOp) \
113+
FOR_EACH(CallOp) \
114+
FOR_EACH(TempRef) \
115+
FOR_EACH(PropertyRef) \
116+
FOR_EACH(FieldRef) \
117+
FOR_EACH(UnaryOp) \
118+
FOR_EACH(CallUserOp) \
119+
FOR_EACH(IncDecOp) \
120+
FOR_EACH(Store) \
121+
FOR_EACH(ThisRef)
122+
123+
namespace sp {
124+
namespace cc {
125+
82126
enum class ExprKind : uint8_t
83127
{
84128
#define _(Name) Name,
@@ -92,3 +136,13 @@ enum class StmtKind : uint8_t
92136
AST_STMT_TYPE_LIST(_)
93137
#undef _
94138
};
139+
140+
enum class IrKind : uint8_t
141+
{
142+
#define _(Name) Name,
143+
IR_NODE_TYPE_LIST(_)
144+
#undef _
145+
};
146+
147+
} // namespace cc
148+
} // namespace sp

0 commit comments

Comments
 (0)