2 proc token_value {token} {
return [dict get $token value]}
3 proc token_type {token} {
return [dict get $token type]}
4 proc token_line {token} {
return [dict get $token line]}
5 proc token_col {token} {
return [dict get $token col]}
7 proc tokenize {code token_patterns keywords {case_sensitive 1}} {
13 set keyword_dict [dict create]
14 foreach kw $keywords {
15 dict set keyword_dict $kw 1
18 set code_len [
string length $code]
19 while {$cursor < $code_len} {
26 while {!$found_match && $cursor < $code_len} {
27 set window_end [
expr {min($cursor + $window_size - 1, $code_len - 1)}]
28 set window [
string range $code $cursor $window_end]
30 foreach item $token_patterns {
31 set type [
lindex $item 0]
32 set pattern [
lindex $item 1]
36 if {[regexp -- $re $window allmatch submatch]} {
38 set match_len [
string length $allmatch]
40 set column [
expr {$cursor - $line_start + 1}]
42 if {$type == "NEWLINE"} {
44 set line_start [
expr {$cursor + $match_len}]
45 }
elseif {$type != "WHITESPACE" && $type != "COMMENT"} {
47 if {!$case_sensitive} {
48 set value [
string tolower $value]
51 if {$type == "IDENTIFIER" && [dict exists $keyword_dict $value]} {
54 lappend tokens [dict create type $type value $value line $line column $column]
57 set cursor [
expr {$cursor + $match_len}]
63 if {!$found_match && $window_end >= $code_len - 1} {
68 set window_size [
expr {$window_size * 2}]
69 Msg Warning "No match found at index $cursor, increasing window size to $window_size"
74 puts stderr "Error: Tokenizer stuck at index $cursor on char '[
string index $code $cursor]'"
110 proc create_hdl_node {type name file_path line {libraries ""} {components_declared ""} {instantiations ""} {entity ""}} {
111 return [dict create type $type name $name file_path $file_path line $line \
112 libraries $libraries components_declared $components_declared instantiations $instantiations entity $entity]
115 proc create_instantiation_node {mod_name type inst_name line} {
116 return [dict create mod_name $mod_name type $type inst_name $inst_name line $line]
119 proc hdl_node_string {hdl_node} {
121 set node_info "Node Type: [dict get $hdl_node type]"
122 append node_info "\n Name: [dict get $hdl_node name]"
123 append node_info "\n File Path: [dict get $hdl_node file_path]"
124 append node_info "\n Declared on line: [dict get $hdl_node line]"
125 if {[dict exists $hdl_node entity] && [dict get $hdl_node entity] ne ""} {
126 append node_info "\n Entity: [dict get $hdl_node entity]"
129 if {[dict get $hdl_node type] eq "vhdl_entity" || [dict get $hdl_node type] eq "vhdl_package" \
130 || [dict get $hdl_node type] eq "vhdl_architecture" || [dict get $hdl_node type] eq "vhdl_package_body"} {
131 append node_info "\n Libraries:"
132 set libraries [dict get $hdl_node libraries]
133 if {[
llength $libraries] == 0} {
134 append node_info " (None)"
136 foreach lib $libraries {
137 append node_info "\n - Library: [dict get $lib name]"
138 set uses [dict get $lib uses]
139 if {[
llength $uses] > 0} {
140 append node_info "\n Uses:"
142 append node_info "\n - $use"
148 if {[dict get $hdl_node type] eq "vhdl_architecture"} {
149 append node_info "\n Components Declared:"
150 set components_declared [dict get $hdl_node components_declared]
151 if {[
llength $components_declared] == 0} {
152 append node_info " (None)"
154 foreach comp $components_declared {
155 append node_info "\n - [dict get $comp name] (line [dict get $comp line])"
158 append node_info "\n Instantiations:"
159 set instantiations [dict get $hdl_node instantiations]
160 if {[
llength $instantiations] == 0} {
161 append node_info " (None)"
163 foreach inst $instantiations {
165 append node_info "\n - Name: [dict get $inst mod_name], Instance: [dict get $inst inst_name] type: [dict get $inst type] (line [dict get $inst line])"
169 }
elseif {[dict get $hdl_node type] eq "verilog_module"} {
170 append node_info "\n Instantiations:"
171 set instantiations [dict get $hdl_node instantiations]
172 if {[
llength $instantiations] == 0} {
173 append node_info " (None)"
175 foreach inst $instantiations {
177 append node_info "\n - Name: [dict get $inst mod_name], Instance: [dict get $inst inst_name] type: [dict get $inst type] (line [dict get $inst line])"
192 set verilog_keywords {
193 always and assign automatic
194 begin buf bufif0 bufif1 case casex
195 casez cell cmos config
196 deassign default defparam design disable
197 edge else end endcase endconfig endfunction endgenerate
198 endmodule endpackage endprimitive endspecify endtable endtask event
199 for force forever fork function generate genvar highz0
200 highz1 if ifnone incdir include initial inout input instance integer
201 join large liblist library localparam macromodule medium module nand negedge
202 nmos nor noshowcancelled not notif0 notif1 or output package parameter pmos
203 posedge primitive pull0 pull1 pulldown pullup pulsestyle_onevent pulsestyle_ondetect rcmos real
204 realtime reg release repeat rnmos rpmos rtran rtranif0 rtranif1 scalared
205 showcancelled signed small specify specparam strong0 strong1 supply0 supply1 table
206 task time tran tranif0 tranif1 tri tri0 tri1 triand trior
207 trireg unsigned1 use uwire vectored wait wand weak0 weak1 while
211 set verilog_token_patterns {
212 {COMMENT {//[^\n]*|/\*.*?\*/}}
214 {WHITESPACE {[ \t\r]+}}
216 {NUMBER {\d['\d_hHbsodxza-fA-F]*}}
217 {IDENTIFIER {[a-zA-Z_][a-zA-Z0-9_$]*}}
218 {OPERATOR {[+\-*/%<>=!&|~^?@:]+}}
229 {DIRECTIVE {\`[a-zA-Z_][a-zA-Z0-9_]*}}
233 proc tokenize_verilog {code} {
234 global verilog_token_patterns verilog_keywords
235 return [
tokenize $code $verilog_token_patterns $verilog_keywords 1]
238 proc find_verilog_constructs {tokens filename} {
240 set state "TOP_LEVEL"
241 set current_module ""
242 set current_module_insts [list]
243 set current_package ""
244 set current_package_insts [list]
245 set package_start_line 0
246 set sv_pending_imports [list]
248 for {
set i 0} {$i < [
llength $tokens]} {
incr i} {
249 set token [
lindex $tokens $i]
255 if {$type == "IDENTIFIER" && $value == "import"} {
256 if {$i + 2 < [
llength $tokens]} {
257 set t1 [
lindex $tokens [
expr {$i + 1}]]
258 set t2 [
lindex $tokens [
expr {$i + 2}]]
263 if {$state == "TOP_LEVEL"} {
264 lappend sv_pending_imports $import_node
265 }
elseif {$state == "IN_MODULE_HEADER" || $state == "IN_MODULE_BODY"} {
266 lappend current_module_insts $import_node
267 }
elseif {$state == "IN_PACKAGE_BODY"} {
268 lappend current_package_insts $import_node
277 if {$type == "DIRECTIVE" && $value == "`include"} {
279 if {$i + 1 < [
llength $tokens]} {
280 set t1 [
lindex $tokens [
expr {$i + 1}]]
282 set include_file [
string trim [
token_value $t1] "\""]
286 while {$i + 1 < [
llength $tokens]} {
288 set t [
lindex $tokens $i]
296 if {$include_file ne ""} {
299 if {$state == "TOP_LEVEL"} {
300 lappend sv_pending_imports $inc_node
301 }
elseif {$state == "IN_MODULE_HEADER" || $state == "IN_MODULE_BODY"} {
302 lappend current_module_insts $inc_node
303 }
elseif {$state == "IN_PACKAGE_BODY"} {
304 lappend current_package_insts $inc_node
310 if {$state == "TOP_LEVEL"} {
311 if {$type == "KEYWORD" && $value == "module"} {
312 if {$i + 1 < [
llength $tokens]} {
313 set next_token [
lindex $tokens [
expr {$i + 1}]]
314 if {[
token_type $next_token] == "IDENTIFIER"} {
316 set state "IN_MODULE_HEADER"
317 set current_module_insts $sv_pending_imports
318 set sv_pending_imports [list]
321 }
elseif {$type == "KEYWORD" && $value == "package"} {
324 if {$i + 1 < [
llength $tokens]} {
325 set next_token [
lindex $tokens [
expr {$i + 1}]]
326 if {[
token_type $next_token] == "IDENTIFIER"} {
329 set state "IN_PACKAGE_BODY"
330 set current_package_insts $sv_pending_imports
331 set sv_pending_imports [list]
335 }
elseif {$state == "IN_MODULE_HEADER"} {
336 if {$type == "SEMICOLON"} {
337 set state "IN_MODULE_BODY"
339 }
elseif {$state == "IN_MODULE_BODY"} {
340 if {$type == "KEYWORD" && $value == "endmodule"} {
342 set decl_node [
create_hdl_node "verilog_module" $current_module $filename [dict get $token line] "" "" $current_module_insts]
343 lappend results $decl_node
345 set state "TOP_LEVEL"
346 set current_module ""
350 if {$type == "IDENTIFIER"} {
351 if {$i + 2 < [
llength $tokens]} {
352 set token2 [
lindex $tokens [
expr {$i + 1}]]
353 set token3 [
lindex $tokens [
expr {$i + 2}]]
357 lappend current_module_insts $inst_dict
363 while {$i < [
llength $tokens]} {
364 set token [
lindex $tokens $i]
379 set token [
lindex $tokens $i]
382 lappend current_module_insts $inst_dict
387 }
elseif {$state == "IN_PACKAGE_BODY"} {
388 if {$type == "KEYWORD" && $value == "endpackage"} {
389 set decl_node [
create_hdl_node "sv_package" $current_package $filename \
390 $package_start_line "" "" $current_package_insts]
391 lappend results $decl_node
392 set state "TOP_LEVEL"
393 set current_package ""
394 set current_package_insts [list]
405 abs access after alias all and architecture array assert attribute
406 begin block body buffer bus case component configuration constant
407 disconnect downto else elsif end entity exit file for function
408 generate generic group guarded if impure in inertial inout is
409 label library linkage literal loop map mod nand new next nor not
410 null of on open or others out package port postponed procedure
411 process pure range record register reject rem report return rol
412 ror select severity signal shared sla sll sra srl subtype then
413 to transport type unaffected units until use variable wait when
417 set vhdl_token_patterns {
420 {WHITESPACE {[ \t\r]+}}
422 {CHAR_LITERAL {'[^']'}}
423 {NUMBER {\d['\d_.]*}}
424 {IDENTIFIER {[a-zA-Z][a-zA-Z0-9_]*}}
425 {OPERATOR {[:=<>|/*&.+-]}}
433 proc tokenize_vhdl {code} {
434 global vhdl_token_patterns vhdl_keywords
435 return [
tokenize $code $vhdl_token_patterns $vhdl_keywords 0]
439 proc parse_vhdl_architecture_header {tokens index} {
440 set architecture_components [list]
441 set architecture_pkg_insts [list]
443 for {
set i $index} {$i < [
llength $tokens]} {
incr i} {
444 set token [
lindex $tokens $i]
448 if {$type == "KEYWORD" && $value == "begin"} {
453 if {$type == "KEYWORD" && $value == "component"} {
454 if {[
expr {$i + 2}] < [
llength $tokens]} {
455 set comp_name_tok [
lindex $tokens [
expr {$i + 1}]]
456 set is_tok [
lindex $tokens [
expr {$i + 2}]]
458 if {[
token_type $comp_name_tok] == "IDENTIFIER" &&
460 lappend architecture_components [dict create name [
token_value $comp_name_tok] line [
token_line $comp_name_tok]]
466 if {$type == "KEYWORD" && $value == "package"} {
467 if {$i + 6 < [
llength $tokens]} {
468 set name_tok [
lindex $tokens [
expr {$i + 1}]]
469 set is_tok [
lindex $tokens [
expr {$i + 2}]]
470 set new_tok [
lindex $tokens [
expr {$i + 3}]]
471 set lib_tok [
lindex $tokens [
expr {$i + 4}]]
472 set dot_tok [
lindex $tokens [
expr {$i + 5}]]
473 set pkg_tok [
lindex $tokens [
expr {$i + 6}]]
484 while {$i < [
llength $tokens] && \
485 [
token_type [
lindex $tokens $i]] != "SEMICOLON"} {
incr i}
490 return [dict create index $i components $architecture_components \
491 pkg_insts $architecture_pkg_insts]
494 proc parse_vhdl_architecture_body {tokens index arch_name} {
495 set architecture_insts [list]
497 for {
set i $index} {$i < [
llength $tokens]} {
incr i} {
498 set token [
lindex $tokens $i]
502 if {$type == "KEYWORD" && $value == "end"} {
503 if {$i + 2 < [
llength $tokens]} {
504 set token2 [
lindex $tokens [
expr {$i + 1}]]
505 set token3 [
lindex $tokens [
expr {$i + 2}]]
513 if {$type == "IDENTIFIER"} {
514 if {$i + 2 < [
llength $tokens]} {
516 set token2 [
lindex $tokens [
expr {$i + 1}]]
517 set token3 [
lindex $tokens [
expr {$i + 2}]]
521 set entity_inst_name ""
522 while {$i < [
llength $tokens]} {
523 set token [
lindex $tokens $i]
527 if {$value == "generic" || $value == "port" || $type == "SEMICOLON"} {
530 set entity_inst_name "${entity_inst_name}${value}"
533 if {$entity_inst_name != ""} {
538 set token4 [
lindex $tokens [
expr {$i + 3}]]
544 set is_instantiation 0
546 for {
set k [
expr {$i + 3}]} {$k < [
llength $tokens]} {
incr k} {
547 set check_token [
lindex $tokens $k]
551 if {$check_type == "SEMICOLON"} {
554 if {$check_type == "KEYWORD" && ($check_value eq "port" || $check_value eq "generic")} {
555 if {[
expr {$k + 1}] < [
llength $tokens]} {
556 set next_token [
lindex $tokens [
expr {$k + 1}]]
558 set is_instantiation 1
565 if {$is_instantiation} {
573 return [dict create index $i insts $architecture_insts]
576 proc parse_vhdl_architecture_content {arch tokens index} {
580 set i [dict get $header_info index]
581 set architecture_components [dict get $header_info components]
582 set architecture_pkg_insts [dict get $header_info pkg_insts]
584 if {$i < [
llength $tokens] && [
token_value [
lindex $tokens $i]] eq "begin"} {
588 set i [dict get $body_info index]
589 set architecture_insts [
concat $architecture_pkg_insts [dict get $body_info insts]]
591 return [dict create index $i components $architecture_components insts $architecture_insts]
594 proc skip_vhdl_package_spec {tokens index} {
596 while {$i < [
llength $tokens]} {
597 set current_tok [
lindex $tokens $i]
599 if {$current_val eq "end"} {
600 if {[
expr {$i + 1} < [
llength $tokens]]} {
601 set next_token [
lindex $tokens [
expr {$i + 1}]]
603 set i [
expr {$i + 1}]
613 proc skip_vhdl_package_body {tokens index} {
615 while {$i < [
llength $tokens]} {
616 set current_tok [
lindex $tokens $i]
618 if {$current_val eq "end"} {
619 if {[
expr {$i + 2} < [
llength $tokens]]} {
620 set next_token [
lindex $tokens [
expr {$i + 1}]]
621 set next_next_token [
lindex $tokens [
expr {$i + 2}]]
623 set i [
expr {$i + 2}]
633 proc find_vhdl_constructs {tokens filename} {
635 set libraries_map [dict create]
637 for {
set i 0} {$i < [
llength $tokens]} {
incr i} {
638 set token [
lindex $tokens $i]
642 if {$type == "KEYWORD" && $value == "library"} {
643 set i [
expr {$i + 1}]
644 while {$i < [
llength $tokens]} {
645 set token [
lindex $tokens $i]
649 if {$type == "IDENTIFIER"} {
650 set lib_name [
string tolower $value]
651 if {![dict exists $libraries_map $lib_name]} {
652 dict set libraries_map $lib_name [list]
654 }
elseif {$type == "SEMICOLON"} {
659 }
elseif {$type == "KEYWORD" && $value == "use"} {
660 if {$i + 1 < [
llength $tokens]} {
661 set use_path_start_idx [
expr {$i + 1}]
663 for {
set j $use_path_start_idx} {$j < [
llength $tokens]} {
incr j} {
664 set use_token [
lindex $tokens $j]
671 if {$use_path != ""} {
672 set use_path_parts [
split $use_path .]
673 set lib_name [
string tolower [
lindex $use_path_parts 0]]
674 if {![dict exists $libraries_map $lib_name]} {
675 dict set libraries_map $lib_name [list]
677 dict lappend libraries_map $lib_name $use_path
680 }
elseif {$type == "KEYWORD" && $value == "entity"} {
681 if {$i + 2 < [
llength $tokens]} {
682 set name_tok [
lindex $tokens [
expr {$i + 1}]]
683 set is_tok [
lindex $tokens [
expr {$i + 2}]]
686 set final_libraries [list]
687 dict for {lib_name use_paths} $libraries_map {
688 lappend final_libraries [dict create name $lib_name uses $use_paths]
691 lappend results $entity_node
694 }
elseif {$type == "KEYWORD" && $value == "package"} {
695 if {[
expr {$i + 1} < [
llength $tokens]]} {
696 set next_token [
lindex $tokens [
expr {$i + 1}]]
698 if {[
token_type $next_token] == "IDENTIFIER"} {
701 set i [
expr {$i + 2}]
703 if {$i < [
llength $tokens]} {
704 set current_tok [
lindex $tokens $i]
706 if {[
token_type $current_tok] == "KEYWORD" && $current_val eq "is"} {
709 if {$i + 1 < [
llength $tokens] &&
710 [
token_value [
lindex $tokens [
expr {$i + 1}]]] eq "new"} {
715 if {$i + 4 < [
llength $tokens]} {
716 set lib_tok [
lindex $tokens [
expr {$i + 2}]]
717 set dot_tok [
lindex $tokens [
expr {$i + 3}]]
718 set pkg_tok [
lindex $tokens [
expr {$i + 4}]]
722 set ref_lib [
string tolower [
token_value $lib_tok]]
724 if {![dict exists $libraries_map $ref_lib]} {
725 dict set libraries_map $ref_lib [list]
727 dict lappend libraries_map $ref_lib "${ref_lib}.${ref_pkg}.all"
731 while {$i < [
llength $tokens] && \
732 [
token_type [
lindex $tokens $i]] != "SEMICOLON"} {
incr i}
737 set final_libraries [list]
738 dict for {lib_name use_paths} $libraries_map {
739 lappend final_libraries [dict create name $lib_name uses $use_paths]
741 set package_node [
create_hdl_node "vhdl_package" $package_name $filename $package_line $final_libraries]
742 lappend results $package_node
749 set final_libraries [list]
750 dict for {lib_name use_paths} $libraries_map {
751 lappend final_libraries [dict create name $lib_name uses $use_paths]
753 set package_body_node [
create_hdl_node "vhdl_package_body" $package_name $filename $package_line $final_libraries]
754 lappend results $package_body_node
758 }
elseif {$type == "KEYWORD" && $value == "architecture"} {
759 if {$i + 4 < [
llength $tokens]} {
760 set arch_name_tok [
lindex $tokens [
expr {$i + 1}]]
761 set of_tok [
lindex $tokens [
expr {$i + 2}]]
762 set entity_name_tok [
lindex $tokens [
expr {$i + 3}]]
763 set is_tok [
lindex $tokens [
expr {$i + 4}]]
771 set i [dict get $arch_info index]
772 set components [dict get $arch_info components]
773 set insts [dict get $arch_info insts]
774 set final_libraries [list]
775 dict for {lib_name use_paths} $libraries_map {
776 lappend final_libraries [dict create name $lib_name uses $use_paths]
779 set arch_node [
create_hdl_node "vhdl_architecture" $arch_name $filename [
token_line $arch_name_tok] $final_libraries $components $insts $entity_name]
780 lappend results $arch_node
789 proc parse_hdl_file {filename} {
791 if {![
file exists $filename]} {
792 puts "Error: file not found: $filename"
796 set fp [open $filename r]
802 set first_line [
lindex [
split $code "\n"] 0]
803 set first_line_trimmed [
string trim $first_line]
804 if {$first_line_trimmed eq "`pragma protect begin_protected" ||
805 $first_line_trimmed eq "`protect begin_protected"} {
809 set extension [
string tolower [
file extension $filename]]
814 switch -- $extension {
819 set t_tokenize [
time {set tokens [tokenize_verilog $code]} 1]
820 set t_constructs [
time {set constructs [find_verilog_constructs $tokens $filename]} 1]
824 set t_tokenize [
time {set tokens [tokenize_vhdl $code]} 1]
825 set t_constructs [
time {set constructs [find_vhdl_constructs $tokens $filename]} 1]
833 set tokenize_us [
lindex $t_tokenize 0]
834 set constructs_us [
lindex $t_constructs 0]
835 set tokenize_ms [
expr {$tokenize_us / 1000.0}]
836 set constructs_ms [
expr {$constructs_us / 1000.0}]