Base Parser Infrastructure.
This module defines the foundational abstractions for the parsing subsystem.
It establishes the LanguageParser base class, the ParseResult container,
and the unified Extractor protocol used to implement language-specific logic.
Classes
Bases: ABC
Abstract base class for extractors.
Provides a standard inheritance base for implementing the Extractor protocol.
Source code in src/jnkn/parsing/base.py
| class BaseExtractor(ABC):
"""
Abstract base class for extractors.
Provides a standard inheritance base for implementing the Extractor protocol.
"""
@property
@abstractmethod
def name(self) -> str:
"""Unique identifier for this extractor."""
pass
@property
@abstractmethod
def priority(self) -> int:
"""Higher priority extractors run first (0-100)."""
pass
@abstractmethod
def can_extract(self, ctx: ExtractionContext) -> bool:
"""Quick check if this extractor is relevant."""
pass
@abstractmethod
def extract(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""Extract artifacts and yield nodes/edges."""
pass
|
Unique identifier for this extractor.
Higher priority extractors run first (0-100).
Quick check if this extractor is relevant.
Source code in src/jnkn/parsing/base.py
| @abstractmethod
def can_extract(self, ctx: ExtractionContext) -> bool:
"""Quick check if this extractor is relevant."""
pass
|
Extract artifacts and yield nodes/edges.
Source code in src/jnkn/parsing/base.py
| @abstractmethod
def extract(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""Extract artifacts and yield nodes/edges."""
pass
|
CompositeParser
Bases: LanguageParser
A parser that delegates to multiple sub-parsers.
Useful for handling directories or mixed-content scenarios.
Source code in src/jnkn/parsing/base.py
| class CompositeParser(LanguageParser):
"""
A parser that delegates to multiple sub-parsers.
Useful for handling directories or mixed-content scenarios.
"""
@property
def name(self) -> str:
return "composite"
def __init__(self, context: ParserContext, parsers: List[LanguageParser]):
super().__init__(context)
self.parsers = parsers
def can_parse(self, file_path: Path, content: bytes | None = None) -> bool:
return any(p.can_parse(file_path, content) for p in self.parsers)
def parse(self, file_path: Path, content: bytes) -> List[Union[Node, Edge]]:
results = []
for parser in self.parsers:
if parser.can_parse(file_path, content):
results.extend(parser.parse(file_path, content))
return results
|
Context object passed to Extractors during processing.
Provides access to the file content, path, and shared state (like deduplication sets)
needed by individual extractor implementations.
Source code in src/jnkn/parsing/base.py
| @dataclass
class ExtractionContext:
"""
Context object passed to Extractors during processing.
Provides access to the file content, path, and shared state (like deduplication sets)
needed by individual extractor implementations.
"""
file_path: Path
file_id: str
text: str
tree: Any | None = None # Tree-sitter AST object
seen_ids: Set[str] = field(default_factory=set)
|
Bases: Protocol
Protocol for implementing modular extraction logic.
Extractors are specialized components (e.g., 'EnvVarExtractor', 'ImportExtractor')
that focus on finding specific patterns within a source file.
Source code in src/jnkn/parsing/base.py
| class Extractor(Protocol):
"""
Protocol for implementing modular extraction logic.
Extractors are specialized components (e.g., 'EnvVarExtractor', 'ImportExtractor')
that focus on finding specific patterns within a source file.
"""
@property
def name(self) -> str:
"""Unique identifier for the extractor (for debugging)."""
...
@property
def priority(self) -> int:
"""Execution priority (0-100). Higher runs first."""
...
def can_extract(self, ctx: ExtractionContext) -> bool:
"""Determine if this extractor applies to the current context."""
...
def extract(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""Yield Nodes and Edges found in the source text."""
...
|
Unique identifier for the extractor (for debugging).
Execution priority (0-100). Higher runs first.
Determine if this extractor applies to the current context.
Source code in src/jnkn/parsing/base.py
| def can_extract(self, ctx: ExtractionContext) -> bool:
"""Determine if this extractor applies to the current context."""
...
|
Yield Nodes and Edges found in the source text.
Source code in src/jnkn/parsing/base.py
| def extract(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""Yield Nodes and Edges found in the source text."""
...
|
Registry for managing and executing a collection of Extractors.
Source code in src/jnkn/parsing/base.py
| class ExtractorRegistry:
"""
Registry for managing and executing a collection of Extractors.
"""
def __init__(self):
self._extractors: List[Extractor] = []
def register(self, extractor: Extractor) -> None:
"""Register an extractor and maintain priority sort order."""
self._extractors.append(extractor)
self._extractors.sort(key=lambda e: -e.priority)
def extract_all(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""
Execute all registered extractors against the provided context.
Failures in individual extractors are logged but do not halt the process.
"""
for extractor in self._extractors:
if extractor.can_extract(ctx):
try:
yield from extractor.extract(ctx)
except Exception as e:
logger.debug(f"Extractor {extractor.name} failed on {ctx.file_path}: {e}")
|
Execute all registered extractors against the provided context.
Failures in individual extractors are logged but do not halt the process.
Source code in src/jnkn/parsing/base.py
| def extract_all(self, ctx: ExtractionContext) -> Generator[Union[Node, Edge], None, None]:
"""
Execute all registered extractors against the provided context.
Failures in individual extractors are logged but do not halt the process.
"""
for extractor in self._extractors:
if extractor.can_extract(ctx):
try:
yield from extractor.extract(ctx)
except Exception as e:
logger.debug(f"Extractor {extractor.name} failed on {ctx.file_path}: {e}")
|
Register an extractor and maintain priority sort order.
Source code in src/jnkn/parsing/base.py
| def register(self, extractor: Extractor) -> None:
"""Register an extractor and maintain priority sort order."""
self._extractors.append(extractor)
self._extractors.sort(key=lambda e: -e.priority)
|
LanguageParser
Bases: IParser, ABC
Abstract Base Class for language-specific parsers.
Implementations must define supported extensions and the parsing logic.
Source code in src/jnkn/parsing/base.py
| class LanguageParser(IParser, ABC):
"""
Abstract Base Class for language-specific parsers.
Implementations must define supported extensions and the parsing logic.
"""
def __init__(self, context: ParserContext | None = None):
self.context = context or ParserContext()
self._logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
@property
@abstractmethod
def name(self) -> str:
"""The unique name of the language (e.g., 'python')."""
pass
@property
def extensions(self) -> List[str]:
"""List of file extensions supported by this parser."""
return []
@abstractmethod
def can_parse(self, file_path: Path, content: bytes | None = None) -> bool:
"""
Determine if the file can be parsed.
Args:
file_path: The path to the file.
content: Optional file content for heuristic detection.
"""
pass
@abstractmethod
def parse(self, file_path: Path, content: bytes) -> List[Union[Node, Edge]]:
"""Parse the file and return a list of Nodes and Edges."""
pass
def parse_full(self, file_path: Path, content: bytes | None = None) -> ParseResult:
"""
Parse a file and wrap the output in a standardized ParseResult.
Handles exceptions and file reading if content is not provided.
"""
nodes = []
edges = []
errors = []
try:
if content is None:
content = file_path.read_bytes()
for item in self.parse(file_path, content):
if isinstance(item, Node):
nodes.append(item)
elif isinstance(item, Edge):
edges.append(item)
except Exception as e:
errors.append(str(e))
return ParseResult(
file_path=file_path,
file_hash="", # Hash is usually computed by the engine
nodes=nodes,
edges=edges,
errors=errors,
)
def _relativize(self, path: Path) -> str:
"""Return the path relative to the scan root, or absolute if not possible."""
try:
return str(path.relative_to(self.context.root_dir))
except ValueError:
return str(path)
|
Attributes
extensions
property
List of file extensions supported by this parser.
name
abstractmethod
property
The unique name of the language (e.g., 'python').
Functions
can_parse(file_path, content=None)
abstractmethod
Determine if the file can be parsed.
Parameters:
| Name |
Type |
Description |
Default |
file_path
|
Path
|
|
required
|
content
|
bytes | None
|
Optional file content for heuristic detection.
|
None
|
Source code in src/jnkn/parsing/base.py
| @abstractmethod
def can_parse(self, file_path: Path, content: bytes | None = None) -> bool:
"""
Determine if the file can be parsed.
Args:
file_path: The path to the file.
content: Optional file content for heuristic detection.
"""
pass
|
parse(file_path, content)
abstractmethod
Parse the file and return a list of Nodes and Edges.
Source code in src/jnkn/parsing/base.py
| @abstractmethod
def parse(self, file_path: Path, content: bytes) -> List[Union[Node, Edge]]:
"""Parse the file and return a list of Nodes and Edges."""
pass
|
parse_full(file_path, content=None)
Parse a file and wrap the output in a standardized ParseResult.
Handles exceptions and file reading if content is not provided.
Source code in src/jnkn/parsing/base.py
| def parse_full(self, file_path: Path, content: bytes | None = None) -> ParseResult:
"""
Parse a file and wrap the output in a standardized ParseResult.
Handles exceptions and file reading if content is not provided.
"""
nodes = []
edges = []
errors = []
try:
if content is None:
content = file_path.read_bytes()
for item in self.parse(file_path, content):
if isinstance(item, Node):
nodes.append(item)
elif isinstance(item, Edge):
edges.append(item)
except Exception as e:
errors.append(str(e))
return ParseResult(
file_path=file_path,
file_hash="", # Hash is usually computed by the engine
nodes=nodes,
edges=edges,
errors=errors,
)
|
ParseError
dataclass
Represents a non-fatal error encountered during parsing.
Attributes:
| Name |
Type |
Description |
file_path |
str
|
The file where the error occurred.
|
message |
str
|
Description of the error.
|
error_type |
str
|
Category of error (e.g., 'syntax', 'encoding').
|
recoverable |
bool
|
Whether parsing continued despite the error.
|
Source code in src/jnkn/parsing/base.py
| @dataclass
class ParseError:
"""
Represents a non-fatal error encountered during parsing.
Attributes:
file_path (str): The file where the error occurred.
message (str): Description of the error.
error_type (str): Category of error (e.g., 'syntax', 'encoding').
recoverable (bool): Whether parsing continued despite the error.
"""
file_path: str
message: str
error_type: str = "general"
recoverable: bool = True
|
ParseResult
dataclass
The standardized result object returned by all parsers.
Encapsulates the nodes and edges extracted from a file, along with
metadata and any errors encountered during the process.
Source code in src/jnkn/parsing/base.py
| @dataclass
class ParseResult:
"""
The standardized result object returned by all parsers.
Encapsulates the nodes and edges extracted from a file, along with
metadata and any errors encountered during the process.
"""
file_path: Path
file_hash: str
nodes: List[Node] = field(default_factory=list)
edges: List[Edge] = field(default_factory=list)
errors: List[str] = field(default_factory=list)
parse_errors: List[ParseError] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
success: bool = True
capabilities_used: List[str] = field(default_factory=list)
def __post_init__(self):
"""Automatically set success=False if errors are present."""
if self.errors or self.parse_errors:
self.success = False
|
Functions
__post_init__()
Automatically set success=False if errors are present.
Source code in src/jnkn/parsing/base.py
| def __post_init__(self):
"""Automatically set success=False if errors are present."""
if self.errors or self.parse_errors:
self.success = False
|
ParserCapability
Enumeration of capabilities a parser can provide.
Source code in src/jnkn/parsing/base.py
| class ParserCapability:
"""Enumeration of capabilities a parser can provide."""
DEPENDENCIES = "dependencies"
ENV_VARS = "env_vars"
DATA_LINEAGE = "data_lineage"
IMPORTS = "imports"
DEFINITIONS = "definitions"
CONFIGS = "configs"
SECRETS = "secrets"
OUTPUTS = "outputs"
|
ParserContext
Configuration context passed to parsers.
Attributes:
| Name |
Type |
Description |
root_dir |
Path
|
The root directory of the scan.
|
encoding |
str
|
Default file encoding to use.
|
Source code in src/jnkn/parsing/base.py
| class ParserContext:
"""
Configuration context passed to parsers.
Attributes:
root_dir (Path): The root directory of the scan.
encoding (str): Default file encoding to use.
"""
def __init__(self, root_dir: Path | None = None):
self.root_dir = root_dir or Path.cwd()
self.encoding = "utf-8"
|