2 proc token_value {token} {
return [dict get $token value]}
3 proc token_type {token} {
return [dict get $token type]}
4 proc token_line {token} {
return [dict get $token line]}
5 proc token_col {token} {
return [dict get $token col]}
7 proc tokenize {code token_patterns keywords {case_sensitive 1}} {
13 set keyword_dict [dict create]
14 foreach kw $keywords {
15 dict set keyword_dict $kw 1
18 set code_len [
string length $code]
19 while {$cursor < $code_len} {
26 while {!$found_match && $cursor < $code_len} {
27 set window_end [
expr {min($cursor + $window_size - 1, $code_len - 1)}]
28 set window [
string range $code $cursor $window_end]
30 foreach item $token_patterns {
31 set type [
lindex $item 0]
32 set pattern [
lindex $item 1]
36 if {[regexp -- $re $window allmatch submatch]} {
38 set match_len [
string length $allmatch]
40 set column [
expr {$cursor - $line_start + 1}]
42 if {$type == "NEWLINE"} {
44 set line_start [
expr {$cursor + $match_len}]
45 }
elseif {$type != "WHITESPACE" && $type != "COMMENT"} {
47 if {!$case_sensitive} {
48 set value [
string tolower $value]
51 if {$type == "IDENTIFIER" && [dict exists $keyword_dict $value]} {
54 lappend tokens [dict create type $type value $value line $line column $column]
57 set cursor [
expr {$cursor + $match_len}]
63 if {!$found_match && $window_end >= $code_len - 1} {
68 set window_size [
expr {$window_size * 2}]
69 Msg Warning "No match found at index $cursor, increasing window size to $window_size"
74 puts stderr "Error: Tokenizer stuck at index $cursor on char '[
string index $code $cursor]'"
110 proc create_hdl_node {type name file_path line {libraries ""} {components_declared ""} {instantiations ""} {entity ""}} {
111 return [dict create type $type name $name file_path $file_path line $line \
112 libraries $libraries components_declared $components_declared instantiations $instantiations entity $entity]
115 proc create_instantiation_node {mod_name type inst_name line} {
116 return [dict create mod_name $mod_name type $type inst_name $inst_name line $line]
119 proc hdl_node_string {hdl_node} {
121 set node_info "Node Type: [dict get $hdl_node type]"
122 append node_info "\n Name: [dict get $hdl_node name]"
123 append node_info "\n File Path: [dict get $hdl_node file_path]"
124 append node_info "\n Declared on line: [dict get $hdl_node line]"
125 if {[dict exists $hdl_node entity] && [dict get $hdl_node entity] ne ""} {
126 append node_info "\n Entity: [dict get $hdl_node entity]"
129 if {[dict get $hdl_node type] eq "vhdl_entity" || [dict get $hdl_node type] eq "vhdl_package" \
130 || [dict get $hdl_node type] eq "vhdl_architecture" || [dict get $hdl_node type] eq "vhdl_package_body"} {
131 append node_info "\n Libraries:"
132 set libraries [dict get $hdl_node libraries]
133 if {[
llength $libraries] == 0} {
134 append node_info " (None)"
136 foreach lib $libraries {
137 append node_info "\n - Library: [dict get $lib name]"
138 set uses [dict get $lib uses]
139 if {[
llength $uses] > 0} {
140 append node_info "\n Uses:"
142 append node_info "\n - $use"
148 if {[dict get $hdl_node type] eq "vhdl_architecture"} {
149 append node_info "\n Components Declared:"
150 set components_declared [dict get $hdl_node components_declared]
151 if {[
llength $components_declared] == 0} {
152 append node_info " (None)"
154 foreach comp $components_declared {
155 append node_info "\n - [dict get $comp name] (line [dict get $comp line])"
158 append node_info "\n Instantiations:"
159 set instantiations [dict get $hdl_node instantiations]
160 if {[
llength $instantiations] == 0} {
161 append node_info " (None)"
163 foreach inst $instantiations {
165 append node_info "\n - Name: [dict get $inst mod_name], Instance: [dict get $inst inst_name] type: [dict get $inst type] (line [dict get $inst line])"
169 }
elseif {[dict get $hdl_node type] eq "verilog_module"} {
170 append node_info "\n Instantiations:"
171 set instantiations [dict get $hdl_node instantiations]
172 if {[
llength $instantiations] == 0} {
173 append node_info " (None)"
175 foreach inst $instantiations {
177 append node_info "\n - Name: [dict get $inst mod_name], Instance: [dict get $inst inst_name] type: [dict get $inst type] (line [dict get $inst line])"
192 set verilog_keywords {
193 always and assign automatic
194 begin buf bufif0 bufif1 case casex
195 casez cell cmos config
196 deassign default defparam design disable
197 edge else end endcase endconfig endfunction endgenerate
198 endmodule endprimitive endspecify endtable endtask event
199 for force forever fork function generate genvar highz0
200 highz1 if ifnone incdir include initial inout input instance integer
201 join large liblist library localparam macromodule medium module nand negedge
202 nmos nor noshowcancelled not notif0 notif1 or output parameter pmos
203 posedge primitive pull0 pull1 pulldown pullup pulsestyle_onevent pulsestyle_ondetect rcmos real
204 realtime reg release repeat rnmos rpmos rtran rtranif0 rtranif1 scalared
205 showcancelled signed small specify specparam strong0 strong1 supply0 supply1 table
206 task time tran tranif0 tranif1 tri tri0 tri1 triand trior
207 trireg unsigned1 use uwire vectored wait wand weak0 weak1 while
211 set verilog_token_patterns {
212 {COMMENT {//[^\n]*|/\*.*?\*/}}
214 {WHITESPACE {[ \t\r]+}}
216 {NUMBER {\d['\d_hHbsodxza-fA-F]*}}
217 {IDENTIFIER {[a-zA-Z_][a-zA-Z0-9_$]*}}
218 {OPERATOR {[+\-*/%<>=!&|~^?@:]+}}
229 {DIRECTIVE {\`[a-zA-Z_][a-zA-Z0-9_]*}}
233 proc tokenize_verilog {code} {
234 global verilog_token_patterns verilog_keywords
235 return [
tokenize $code $verilog_token_patterns $verilog_keywords 1]
238 proc find_verilog_constructs {tokens filename} {
240 set state "TOP_LEVEL"
241 set current_module ""
242 set current_module_insts [list]
244 for {
set i 0} {$i < [
llength $tokens]} {
incr i} {
245 set token [
lindex $tokens $i]
249 if {$state == "TOP_LEVEL"} {
250 if {$type == "KEYWORD" && $value == "module"} {
251 if {$i + 1 < [
llength $tokens]} {
252 set next_token [
lindex $tokens [
expr {$i + 1}]]
253 if {[
token_type $next_token] == "IDENTIFIER"} {
255 set state "IN_MODULE_HEADER"
256 set current_module_insts [list]
260 }
elseif {$state == "IN_MODULE_HEADER"} {
261 if {$type == "SEMICOLON"} {
262 set state "IN_MODULE_BODY"
264 }
elseif {$state == "IN_MODULE_BODY"} {
265 if {$type == "KEYWORD" && $value == "endmodule"} {
267 set decl_node [
create_hdl_node "verilog_module" $current_module $filename [dict get $token line] "" "" $current_module_insts]
268 lappend results $decl_node
270 set state "TOP_LEVEL"
271 set current_module ""
275 if {$type == "IDENTIFIER"} {
276 if {$i + 2 < [
llength $tokens]} {
277 set token2 [
lindex $tokens [
expr {$i + 1}]]
278 set token3 [
lindex $tokens [
expr {$i + 2}]]
282 lappend current_module_insts $inst_dict
288 while {$i < [
llength $tokens]} {
289 set token [
lindex $tokens $i]
304 set token [
lindex $tokens $i]
307 lappend current_module_insts $inst_dict
321 abs access after alias all and architecture array assert attribute
322 begin block body buffer bus case component configuration constant
323 disconnect downto else elsif end entity exit file for function
324 generate generic group guarded if impure in inertial inout is
325 label library linkage literal loop map mod nand new next nor not
326 null of on open or others out package port postponed procedure
327 process pure range record register reject rem report return rol
328 ror select severity signal shared sla sll sra srl subtype then
329 to transport type unaffected units until use variable wait when
333 set vhdl_token_patterns {
336 {WHITESPACE {[ \t\r]+}}
338 {CHAR_LITERAL {'[^']'}}
339 {NUMBER {\d['\d_.]*}}
340 {IDENTIFIER {[a-zA-Z][a-zA-Z0-9_]*}}
341 {OPERATOR {[:=<>|/*&.+-]}}
349 proc tokenize_vhdl {code} {
350 global vhdl_token_patterns vhdl_keywords
351 return [
tokenize $code $vhdl_token_patterns $vhdl_keywords 0]
355 proc parse_vhdl_architecture_header {tokens index} {
356 set architecture_components [list]
358 for {
set i $index} {$i < [
llength $tokens]} {
incr i} {
359 set token [
lindex $tokens $i]
363 if {$type == "KEYWORD" && $value == "begin"} {
368 if {$type == "KEYWORD" && $value == "component"} {
369 if {[
expr {$i + 2}] < [
llength $tokens]} {
370 set comp_name_tok [
lindex $tokens [
expr {$i + 1}]]
371 set is_tok [
lindex $tokens [
expr {$i + 2}]]
373 if {[
token_type $comp_name_tok] == "IDENTIFIER" &&
375 lappend architecture_components [dict create name [
token_value $comp_name_tok] line [
token_line $comp_name_tok]]
380 return [dict create index $i components $architecture_components]
383 proc parse_vhdl_architecture_body {tokens index arch_name} {
384 set architecture_insts [list]
386 for {
set i $index} {$i < [
llength $tokens]} {
incr i} {
387 set token [
lindex $tokens $i]
391 if {$type == "KEYWORD" && $value == "end"} {
392 if {$i + 2 < [
llength $tokens]} {
393 set token2 [
lindex $tokens [
expr {$i + 1}]]
394 set token3 [
lindex $tokens [
expr {$i + 2}]]
402 if {$type == "IDENTIFIER"} {
403 if {$i + 2 < [
llength $tokens]} {
405 set token2 [
lindex $tokens [
expr {$i + 1}]]
406 set token3 [
lindex $tokens [
expr {$i + 2}]]
410 set entity_inst_name ""
411 while {$i < [
llength $tokens]} {
412 set token [
lindex $tokens $i]
416 if {$value == "generic" || $value == "port" || $type == "SEMICOLON"} {
419 set entity_inst_name "${entity_inst_name}${value}"
422 if {$entity_inst_name != ""} {
427 set token4 [
lindex $tokens [
expr {$i + 3}]]
433 set is_instantiation 0
435 for {
set k [
expr {$i + 3}]} {$k < [
llength $tokens]} {
incr k} {
436 set check_token [
lindex $tokens $k]
440 if {$check_type == "SEMICOLON"} {
443 if {$check_type == "KEYWORD" && ($check_value eq "port" || $check_value eq "generic")} {
444 if {[
expr {$k + 1}] < [
llength $tokens]} {
445 set next_token [
lindex $tokens [
expr {$k + 1}]]
447 set is_instantiation 1
454 if {$is_instantiation} {
462 return [dict create index $i insts $architecture_insts]
465 proc parse_vhdl_architecture_content {arch tokens index} {
469 set i [dict get $header_info index]
470 set architecture_components [dict get $header_info components]
472 if {$i < [
llength $tokens] && [
token_value [
lindex $tokens $i]] eq "begin"} {
476 set i [dict get $body_info index]
477 set architecture_insts [dict get $body_info insts]
479 return [dict create index $i components $architecture_components insts $architecture_insts]
482 proc skip_vhdl_package_spec {tokens index} {
484 while {$i < [
llength $tokens]} {
485 set current_tok [
lindex $tokens $i]
487 if {$current_val eq "end"} {
488 if {[
expr {$i + 1} < [
llength $tokens]]} {
489 set next_token [
lindex $tokens [
expr {$i + 1}]]
491 set i [
expr {$i + 1}]
501 proc skip_vhdl_package_body {tokens index} {
503 while {$i < [
llength $tokens]} {
504 set current_tok [
lindex $tokens $i]
506 if {$current_val eq "end"} {
507 if {[
expr {$i + 2} < [
llength $tokens]]} {
508 set next_token [
lindex $tokens [
expr {$i + 1}]]
509 set next_next_token [
lindex $tokens [
expr {$i + 2}]]
511 set i [
expr {$i + 2}]
521 proc find_vhdl_constructs {tokens filename} {
523 set libraries_map [dict create]
525 for {
set i 0} {$i < [
llength $tokens]} {
incr i} {
526 set token [
lindex $tokens $i]
530 if {$type == "KEYWORD" && $value == "library"} {
531 set i [
expr {$i + 1}]
532 while {$i < [
llength $tokens]} {
533 set token [
lindex $tokens $i]
537 if {$type == "IDENTIFIER"} {
538 set lib_name [
string tolower $value]
539 if {![dict exists $libraries_map $lib_name]} {
540 dict set libraries_map $lib_name [list]
542 }
elseif {$type == "SEMICOLON"} {
547 }
elseif {$type == "KEYWORD" && $value == "use"} {
548 if {$i + 1 < [
llength $tokens]} {
549 set use_path_start_idx [
expr {$i + 1}]
551 for {
set j $use_path_start_idx} {$j < [
llength $tokens]} {
incr j} {
552 set use_token [
lindex $tokens $j]
559 if {$use_path != ""} {
560 set use_path_parts [
split $use_path .]
561 set lib_name [
string tolower [
lindex $use_path_parts 0]]
562 if {![dict exists $libraries_map $lib_name]} {
563 dict set libraries_map $lib_name [list]
565 dict lappend libraries_map $lib_name $use_path
568 }
elseif {$type == "KEYWORD" && $value == "entity"} {
569 if {$i + 2 < [
llength $tokens]} {
570 set name_tok [
lindex $tokens [
expr {$i + 1}]]
571 set is_tok [
lindex $tokens [
expr {$i + 2}]]
574 set final_libraries [list]
575 dict for {lib_name use_paths} $libraries_map {
576 lappend final_libraries [dict create name $lib_name uses $use_paths]
579 lappend results $entity_node
582 }
elseif {$type == "KEYWORD" && $value == "package"} {
583 if {[
expr {$i + 1} < [
llength $tokens]]} {
584 set next_token [
lindex $tokens [
expr {$i + 1}]]
586 if {[
token_type $next_token] == "IDENTIFIER"} {
589 set i [
expr {$i + 2}]
591 if {$i < [
llength $tokens]} {
592 set current_tok [
lindex $tokens $i]
594 if {[
token_type $current_tok] == "KEYWORD" && $current_val eq "is"} {
597 set final_libraries [list]
598 dict for {lib_name use_paths} $libraries_map {
599 lappend final_libraries [dict create name $lib_name uses $use_paths]
601 set package_node [
create_hdl_node "vhdl_package" $package_name $filename $package_line $final_libraries]
602 lappend results $package_node
609 set final_libraries [list]
610 dict for {lib_name use_paths} $libraries_map {
611 lappend final_libraries [dict create name $lib_name uses $use_paths]
613 set package_body_node [
create_hdl_node "vhdl_package_body" $package_name $filename $package_line $final_libraries]
614 lappend results $package_body_node
618 }
elseif {$type == "KEYWORD" && $value == "architecture"} {
619 if {$i + 4 < [
llength $tokens]} {
620 set arch_name_tok [
lindex $tokens [
expr {$i + 1}]]
621 set of_tok [
lindex $tokens [
expr {$i + 2}]]
622 set entity_name_tok [
lindex $tokens [
expr {$i + 3}]]
623 set is_tok [
lindex $tokens [
expr {$i + 4}]]
631 set i [dict get $arch_info index]
632 set components [dict get $arch_info components]
633 set insts [dict get $arch_info insts]
634 set final_libraries [list]
635 dict for {lib_name use_paths} $libraries_map {
636 lappend final_libraries [dict create name $lib_name uses $use_paths]
639 set arch_node [
create_hdl_node "vhdl_architecture" $arch_name $filename [
token_line $arch_name_tok] $final_libraries $components $insts $entity_name]
640 lappend results $arch_node
649 proc parse_hdl_file {filename} {
651 if {![
file exists $filename]} {
652 puts "Error: file not found: $filename"
656 set fp [open $filename r]
662 set first_line [
lindex [
split $code "\n"] 0]
663 set first_line_trimmed [
string trim $first_line]
664 if {$first_line_trimmed eq "`pragma protect begin_protected" ||
665 $first_line_trimmed eq "`protect begin_protected"} {
669 set extension [
string tolower [
file extension $filename]]
674 switch -- $extension {
678 set t_tokenize [
time {set tokens [tokenize_verilog $code]} 1]
679 set t_constructs [
time {set constructs [find_verilog_constructs $tokens $filename]} 1]
683 set t_tokenize [
time {set tokens [tokenize_vhdl $code]} 1]
684 set t_constructs [
time {set constructs [find_vhdl_constructs $tokens $filename]} 1]
692 set tokenize_us [
lindex $t_tokenize 0]
693 set constructs_us [
lindex $t_constructs 0]
694 set tokenize_ms [
expr {$tokenize_us / 1000.0}]
695 set constructs_ms [
expr {$constructs_us / 1000.0}]