Skip to content

Commit

Permalink
feat: comments, boolean and delimiters regex
Browse files Browse the repository at this point in the history
  • Loading branch information
danielogen committed Aug 12, 2024
1 parent 00876bd commit b2fcbe9
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 12 deletions.
10 changes: 5 additions & 5 deletions src/PyReprism/languages/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def keywords() -> list:
return keyword

@staticmethod
def comment_regex():
def comment_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify different types of comments and non-comment code in C source files.
Expand All @@ -39,7 +39,7 @@ def comment_regex():
return pattern

@staticmethod
def number_regex():
def number_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify numeric literals in C++ code.
Expand All @@ -50,7 +50,7 @@ def number_regex():
return pattern

@staticmethod
def operator_regex():
def operator_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify C++ operators.
Expand All @@ -61,7 +61,7 @@ def operator_regex():
return pattern

@staticmethod
def keywords_regex():
def keywords_regex() -> re.Pattern:
"""
Return a list of C++ keywords and built-in functions.
Expand Down Expand Up @@ -102,7 +102,7 @@ def remove_comments(source_code: str) -> str:
return CPP.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()

@staticmethod
def remove_keywords(source: str):
def remove_keywords(source: str) -> str:
"""
Remove all C++ keywords from the provided source code string.
Expand Down
3 changes: 3 additions & 0 deletions src/PyReprism/languages/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@


class Python:
"""
This is the class for processing Python source code
"""
def __init__(self):
pass

Expand Down
42 changes: 37 additions & 5 deletions src/PyReprism/languages/scala.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,58 @@ def keywords() -> list:
return keyword

@staticmethod
def comment_regex():
def comment_regex() -> re.Pattern:
pattern = re.compile(r'(?P<comment>//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE)
return pattern

@staticmethod
def number_regex():
def number_regex() -> re.Pattern:
pattern = re.compile(r'\b0x[\da-f]*\.?[\da-f]+|(?:\b\d+\.?\d*|\B\.\d+)(?:e\d+)?[dfl]?')
return pattern

@staticmethod
def operator_regex():
def operator_regex() -> re.Pattern:
pattern = re.compile(r'(^|[^.])(?:\+[+=]?|-[-=]?|!=?|<<?=?|>>?>?=?|==?|&[&=]?|\|[|=]?|\*=?|\/=?|%=?|\^=?|[?:~])')
return pattern

@staticmethod
def keywords_regex():
def keywords_regex() -> re.Pattern:
return re.compile(r'\b(' + '|'.join(Scala.keywords()) + r')\b')

@staticmethod
def boolean_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Scala boolean literals.
This function generates a regular expression that matches the Scala boolean literals `true`, `false`, and the special constant `null`.
:return: A compiled regex pattern to match Scala boolean literals and `null`.
:rtype: re.Pattern
"""
return re.compile(r'\b(?:true|false|null)\b')

@staticmethod
def delimiters_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Scala language delimiters.
This function generates a regular expression that matches Scala language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, the question mark `?`, and the underscore `_`.
:return: A compiled regex pattern to match Scala delimiters.
:rtype: re.Pattern
"""
return re.compile(r'[()\[\]{}.,:;<>?_]')

@staticmethod
def remove_comments(source_code: str, isList: bool = False) -> str:
"""
Remove comments from the provided Java source code string.
:param str source_code: The Java source code from which to remove comments.
:return: The source code with all comments removed.
:rtype: str
"""
return Scala.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
result = []
for match in Scala.comment_regex().finditer(source_code):
if match.group('noncomment'):
Expand All @@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
return ''.join(result)

@staticmethod
def remove_keywords(source: str):
def remove_keywords(source: str) -> str:
return re.sub(re.compile(Scala.keywords_regex()), '', source)
4 changes: 2 additions & 2 deletions src/PyReprism/utils/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ def __init__():
pass

@staticmethod
def whitespaces_regex():
def whitespaces_regex() -> re.Pattern:
return re.compile(r'[\t\x0b\x0c\r ]+|^\s*\n', re.MULTILINE)

@staticmethod
def remove_whitespaces(source: str):
def remove_whitespaces(source: str) -> str:
pattern = re.sub(Normalizer.whitespaces_regex(), '', source)
return pattern.strip()

0 comments on commit b2fcbe9

Please sign in to comment.