Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: detect typos in project names during upload #17649

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def get_app_config(database, nondefaults=None):
"billing.api_version": "2020-08-27",
"mail.backend": "warehouse.email.services.SMTPEmailSender",
"helpdesk.backend": "warehouse.helpdesk.services.ConsoleHelpDeskService",
"helpdesk.notification_backend": "warehouse.helpdesk.services.ConsoleHelpDeskService", # noqa: E501
"helpdesk.notification_backend": "warehouse.helpdesk.services.ConsoleAdminNotificationService", # noqa: E501
"files.url": "http://localhost:7000/",
"archive_files.url": "http://localhost:7000/archive",
"sessions.secret": "123456",
Expand Down
67 changes: 67 additions & 0 deletions tests/functional/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,73 @@ def test_duplicate_file_upload_error(webtest):
assert "File already exists" in resp.body.decode()


def test_typo_check_name_upload_passes(webtest, monkeypatch):
"""
Test not blocking the upload of a release with a typo in the project name,
and emits a notification to the admins.
"""
# TODO: Replace with a better way to generate corpus
monkeypatch.setattr(
"warehouse.packaging.typosnyper._TOP_PROJECT_NAMES",
{"wutang", "requests"},
)

# Set up user, credentials
user = UserFactory.create(with_verified_primary_email=True, clear_pwd="password")
# Construct the macaroon
dm = MacaroonFactory.create(
user_id=user.id,
caveats=[caveats.RequestUser(user_id=str(user.id))],
)
m = pymacaroons.Macaroon(
location="localhost",
identifier=str(dm.id),
key=dm.key,
version=pymacaroons.MACAROON_V2,
)
for caveat in dm.caveats:
m.add_first_party_caveat(caveats.serialize(caveat))
serialized_macaroon = f"pypi-{m.serialize()}"
credentials = base64.b64encode(f"__token__:{serialized_macaroon}".encode()).decode(
"utf-8"
)

# use a dummy file for the upload, the filename/metadata doesn't matter here
with open("./tests/functional/_fixtures/sampleproject-3.0.0.tar.gz", "rb") as f:
content = f.read()

# Construct params and upload
params = MultiDict(
{
":action": "file_upload",
"protocol_version": "1",
"name": "wutamg", # Here is the typo
"sha256_digest": (
"117ed88e5db073bb92969a7545745fd977ee85b7019706dd256a64058f70963d"
),
"filetype": "sdist",
"metadata_version": "2.1",
"version": "3.0.0",
}
)
webtest.post(
"/legacy/",
headers={"Authorization": f"Basic {credentials}"},
params=params,
upload_files=[("content", "wutamg-3.0.0.tar.gz", content)], # and here
status=HTTPStatus.OK,
)

assert user.projects
assert len(user.projects) == 1
project = user.projects[0]
assert project.name == "wutamg" # confirming it passed
assert project.releases
assert len(project.releases) == 1
release = project.releases[0]
assert release.version == "3.0.0"


def test_invalid_classifier_upload_error(webtest):
user = UserFactory.create(with_verified_primary_email=True, clear_pwd="password")

Expand Down
9 changes: 9 additions & 0 deletions tests/unit/packaging/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
ProjectNameUnavailableProhibitedError,
ProjectNameUnavailableSimilarError,
ProjectNameUnavailableStdlibError,
ProjectNameUnavailableTypoSquattingError,
)
from warehouse.packaging.services import (
B2FileStorage,
Expand Down Expand Up @@ -1050,6 +1051,14 @@ def test_check_project_name_too_similar_multiple_existing(self, db_session):
or exc.value.similar_project_name == project2.name
)

def test_check_project_name_typosquatting_prohibited(self, db_session):
# TODO: Update this test once we have a dynamic TopN approach
service = ProjectService(session=db_session)
ProhibitedProjectFactory.create(name="numpy")

with pytest.raises(ProjectNameUnavailableTypoSquattingError):
service.check_project_name("numpi")

def test_check_project_name_ok(self, db_session):
service = ProjectService(session=db_session)

Expand Down
49 changes: 49 additions & 0 deletions tests/unit/packaging/test_typosnyper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from warehouse.packaging.typosnyper import typo_check_name


@pytest.mark.parametrize(
("name", "expected"),
[
("numpy", None), # Pass, no typos, exists
("NuMpy", None), # Pass, same as `numpy` after canonicalization
("nuumpy", ("repeated_characters", "numpy")),
("reequests", ("repeated_characters", "requests")),
("sphnx", ("omitted_characters", "sphinx")),
("python-dteutil", ("omitted_characters", "python-dateutil")),
("pythondateutil", ("omitted_characters", "python-dateutil")),
("jinj2a", ("swapped_characters", "jinja2")),
("dateutil-python", ("swapped_words", "python-dateutil")),
("numpi", ("common_typos", "numpy")),
("requestz", ("common_typos", "requests")),
],
)
def test_typo_check_name(name, expected, monkeypatch):
# Set known entries in the _TOP_PROJECT_NAMES list
# TODO: Replace with a better way to generate corpus
monkeypatch.setattr(
"warehouse.packaging.typosnyper._TOP_PROJECT_NAMES",
{
"numpy",
"requests",
"sphinx",
"beautifulsoup4",
"jinja2",
"python-dateutil",
},
)

assert typo_check_name(name) == expected
20 changes: 10 additions & 10 deletions warehouse/locale/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ msgstr ""
msgid "Select project"
msgstr ""

#: warehouse/manage/forms.py:506 warehouse/oidc/forms/_core.py:29
#: warehouse/manage/forms.py:506 warehouse/oidc/forms/_core.py:33
#: warehouse/oidc/forms/gitlab.py:57
msgid "Specify project name"
msgstr ""
Expand Down Expand Up @@ -658,45 +658,45 @@ msgstr ""
msgid "Expired invitation for '${username}' deleted."
msgstr ""

#: warehouse/oidc/forms/_core.py:31 warehouse/oidc/forms/_core.py:42
#: warehouse/oidc/forms/_core.py:35 warehouse/oidc/forms/_core.py:46
#: warehouse/oidc/forms/gitlab.py:60 warehouse/oidc/forms/gitlab.py:64
msgid "Invalid project name"
msgstr ""

#: warehouse/oidc/forms/_core.py:60
#: warehouse/oidc/forms/_core.py:64
#, python-brace-format
msgid ""
"This project already exists: use the project's publishing settings <a "
"href='${url}'>here</a> to create a Trusted Publisher for it."
msgstr ""

#: warehouse/oidc/forms/_core.py:69
#: warehouse/oidc/forms/_core.py:73
msgid "This project already exists."
msgstr ""

#: warehouse/oidc/forms/_core.py:74
#: warehouse/oidc/forms/_core.py:78
msgid "This project name isn't allowed"
msgstr ""

#: warehouse/oidc/forms/_core.py:78
#: warehouse/oidc/forms/_core.py:82
msgid "This project name is too similar to an existing project"
msgstr ""

#: warehouse/oidc/forms/_core.py:83
#: warehouse/oidc/forms/_core.py:87
msgid ""
"This project name isn't allowed (conflict with the Python standard "
"library module name)"
msgstr ""

#: warehouse/oidc/forms/_core.py:99 warehouse/oidc/forms/_core.py:110
#: warehouse/oidc/forms/_core.py:115 warehouse/oidc/forms/_core.py:126
msgid "Specify a publisher ID"
msgstr ""

#: warehouse/oidc/forms/_core.py:100 warehouse/oidc/forms/_core.py:111
#: warehouse/oidc/forms/_core.py:116 warehouse/oidc/forms/_core.py:127
msgid "Publisher must be specified by ID"
msgstr ""

#: warehouse/oidc/forms/_core.py:116
#: warehouse/oidc/forms/_core.py:132
msgid "Specify an environment name"
msgstr ""

Expand Down
16 changes: 16 additions & 0 deletions warehouse/oidc/forms/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import markupsafe
import structlog
import wtforms

from warehouse.i18n import localize as _
Expand All @@ -19,9 +20,12 @@
ProjectNameUnavailableProhibitedError,
ProjectNameUnavailableSimilarError,
ProjectNameUnavailableStdlibError,
ProjectNameUnavailableTypoSquattingError,
)
from warehouse.utils.project import PROJECT_NAME_RE

log = structlog.get_logger()


class PendingPublisherMixin:
project_name = wtforms.StringField(
Expand Down Expand Up @@ -84,6 +88,18 @@ def validate_project_name(self, field):
" standard library module name)"
)
)
# TODO: Cover with testing and remove pragma
except ProjectNameUnavailableTypoSquattingError as exc: # pragma: no cover
# TODO: raise with an appropriate message when we're ready to implement
# or combine with `ProjectNameUnavailableSimilarError`
# TODO: This is an attempt at structlog, since `request.log` isn't in scope.
# We should be able to use `log` instead, but doesn't have the same output
log.error(
"Typo-squatting error raised but not handled in form validation",
check_name=exc.check_name,
existing_project_name=exc.existing_project_name,
)
pass

@property
def provider(self) -> str: # pragma: no cover
Expand Down
8 changes: 8 additions & 0 deletions warehouse/packaging/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,11 @@ class ProjectNameUnavailableSimilarError(ProjectNameUnavailableError):

def __init__(self, similar_project_name: str):
self.similar_project_name: str = similar_project_name


class ProjectNameUnavailableTypoSquattingError(ProjectNameUnavailableError):
"""Project name is a typo of an existing project."""

def __init__(self, check_name: str, existing_project_name: str):
self.check_name: str = check_name
self.existing_project_name: str = existing_project_name
Loading