Skip to content

Commit dd42961

Browse files
[pylint] Detect pathlib.Path.open calls in unspecified-encoding (PLW1514) (#11288)
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary Resolves #11263 Detect `pathlib.Path.open` calls which do not specify a file encoding. ## Test Plan Test cases added to fixture. --------- Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
1 parent c80c171 commit dd42961

4 files changed

+322
-49
lines changed

crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py

+25
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,28 @@ def func(*args, **kwargs):
6969
(("test.txt")),
7070
# comment
7171
)
72+
73+
# pathlib
74+
from pathlib import Path
75+
76+
# Errors.
77+
Path("foo.txt").open()
78+
Path("foo.txt").open("w")
79+
text = Path("foo.txt").read_text()
80+
Path("foo.txt").write_text(text)
81+
82+
# Non-errors.
83+
Path("foo.txt").open(encoding="utf-8")
84+
Path("foo.txt").open("wb")
85+
Path("foo.txt").open(*args)
86+
Path("foo.txt").open(**kwargs)
87+
text = Path("foo.txt").read_text(encoding="utf-8")
88+
text = Path("foo.txt").read_text(*args)
89+
text = Path("foo.txt").read_text(**kwargs)
90+
Path("foo.txt").write_text(text, encoding="utf-8")
91+
Path("foo.txt").write_text(text, *args)
92+
Path("foo.txt").write_text(text, **kwargs)
93+
94+
# Violation but not detectable
95+
x = Path("foo.txt")
96+
x.open()

crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs

+117-49
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
use std::fmt::{Display, Formatter};
2+
13
use anyhow::Result;
24

3-
use ast::StringLiteralFlags;
45
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix};
56
use ruff_macros::{derive_message_formats, violation};
6-
use ruff_python_ast as ast;
77
use ruff_python_ast::name::QualifiedName;
8-
use ruff_python_ast::Expr;
8+
use ruff_python_ast::{self as ast, Expr, StringLiteralFlags};
9+
use ruff_python_semantic::SemanticModel;
910
use ruff_text_size::{Ranged, TextRange};
1011

1112
use crate::checkers::ast::Checker;
@@ -43,7 +44,7 @@ use crate::settings::types::PythonVersion;
4344
#[violation]
4445
pub struct UnspecifiedEncoding {
4546
function_name: String,
46-
mode: Mode,
47+
mode: ModeArgument,
4748
}
4849

4950
impl AlwaysFixableViolation for UnspecifiedEncoding {
@@ -55,10 +56,10 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
5556
} = self;
5657

5758
match mode {
58-
Mode::Supported => {
59+
ModeArgument::Supported => {
5960
format!("`{function_name}` in text mode without explicit `encoding` argument")
6061
}
61-
Mode::Unsupported => {
62+
ModeArgument::Unsupported => {
6263
format!("`{function_name}` without explicit `encoding` argument")
6364
}
6465
}
@@ -71,11 +72,9 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
7172

7273
/// PLW1514
7374
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
74-
let Some((function_name, mode)) = checker
75-
.semantic()
76-
.resolve_qualified_name(&call.func)
77-
.filter(|qualified_name| is_violation(call, qualified_name))
78-
.map(|qualified_name| (qualified_name.to_string(), Mode::from(&qualified_name)))
75+
let Some((function_name, mode)) = Callee::try_from_call_expression(call, checker.semantic())
76+
.filter(|segments| is_violation(call, segments))
77+
.map(|segments| (segments.to_string(), segments.mode_argument()))
7978
else {
8079
return;
8180
};
@@ -97,6 +96,68 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall)
9796
checker.diagnostics.push(diagnostic);
9897
}
9998

99+
/// Represents the path of the function or method being called.
100+
enum Callee<'a> {
101+
/// Fully-qualified symbol name of the callee.
102+
Qualified(QualifiedName<'a>),
103+
/// Attribute value for the `pathlib.Path(...)` call e.g., `open` in
104+
/// `pathlib.Path(...).open(...)`.
105+
Pathlib(&'a str),
106+
}
107+
108+
impl<'a> Callee<'a> {
109+
fn try_from_call_expression(
110+
call: &'a ast::ExprCall,
111+
semantic: &'a SemanticModel,
112+
) -> Option<Self> {
113+
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = call.func.as_ref() {
114+
// Check for `pathlib.Path(...).open(...)` or equivalent
115+
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
116+
if semantic
117+
.resolve_qualified_name(func)
118+
.is_some_and(|qualified_name| {
119+
matches!(qualified_name.segments(), ["pathlib", "Path"])
120+
})
121+
{
122+
return Some(Callee::Pathlib(attr));
123+
}
124+
}
125+
}
126+
127+
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
128+
return Some(Callee::Qualified(qualified_name));
129+
}
130+
131+
None
132+
}
133+
134+
fn mode_argument(&self) -> ModeArgument {
135+
match self {
136+
Callee::Qualified(qualified_name) => match qualified_name.segments() {
137+
["" | "codecs" | "_io", "open"] => ModeArgument::Supported,
138+
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
139+
ModeArgument::Supported
140+
}
141+
["io" | "_io", "TextIOWrapper"] => ModeArgument::Unsupported,
142+
_ => ModeArgument::Unsupported,
143+
},
144+
Callee::Pathlib(attr) => match *attr {
145+
"open" => ModeArgument::Supported,
146+
_ => ModeArgument::Unsupported,
147+
},
148+
}
149+
}
150+
}
151+
152+
impl Display for Callee<'_> {
153+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
154+
match self {
155+
Callee::Qualified(qualified_name) => f.write_str(&qualified_name.to_string()),
156+
Callee::Pathlib(attr) => f.write_str(&format!("pathlib.Path(...).{attr}")),
157+
}
158+
}
159+
}
160+
100161
/// Generate an [`Edit`] for Python 3.10 and later.
101162
fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
102163
Fix::unsafe_edit(add_argument(
@@ -146,7 +207,7 @@ fn is_binary_mode(expr: &Expr) -> Option<bool> {
146207
}
147208

148209
/// Returns `true` if the given call lacks an explicit `encoding`.
149-
fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
210+
fn is_violation(call: &ast::ExprCall, qualified_name: &Callee) -> bool {
150211
// If we have something like `*args`, which might contain the encoding argument, abort.
151212
if call.arguments.args.iter().any(Expr::is_starred_expr) {
152213
return false;
@@ -160,54 +221,61 @@ fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
160221
{
161222
return false;
162223
}
163-
match qualified_name.segments() {
164-
["" | "codecs" | "_io", "open"] => {
165-
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
166-
if is_binary_mode(mode_arg).unwrap_or(true) {
167-
// binary mode or unknown mode is no violation
168-
return false;
224+
match qualified_name {
225+
Callee::Qualified(qualified_name) => match qualified_name.segments() {
226+
["" | "codecs" | "_io", "open"] => {
227+
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
228+
if is_binary_mode(mode_arg).unwrap_or(true) {
229+
// binary mode or unknown mode is no violation
230+
return false;
231+
}
169232
}
233+
// else mode not specified, defaults to text mode
234+
call.arguments.find_argument("encoding", 3).is_none()
170235
}
171-
// else mode not specified, defaults to text mode
172-
call.arguments.find_argument("encoding", 3).is_none()
173-
}
174-
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
175-
let mode_pos = usize::from(qualified_name.segments()[1] == "SpooledTemporaryFile");
176-
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
177-
if is_binary_mode(mode_arg).unwrap_or(true) {
178-
// binary mode or unknown mode is no violation
236+
["tempfile", tempfile_class @ ("TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile")] =>
237+
{
238+
let mode_pos = usize::from(*tempfile_class == "SpooledTemporaryFile");
239+
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
240+
if is_binary_mode(mode_arg).unwrap_or(true) {
241+
// binary mode or unknown mode is no violation
242+
return false;
243+
}
244+
} else {
245+
// defaults to binary mode
179246
return false;
180247
}
181-
} else {
182-
// defaults to binary mode
183-
return false;
248+
call.arguments
249+
.find_argument("encoding", mode_pos + 2)
250+
.is_none()
184251
}
185-
call.arguments
186-
.find_argument("encoding", mode_pos + 2)
187-
.is_none()
188-
}
189-
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
190-
_ => false,
252+
["io" | "_io", "TextIOWrapper"] => {
253+
call.arguments.find_argument("encoding", 1).is_none()
254+
}
255+
_ => false,
256+
},
257+
Callee::Pathlib(attr) => match *attr {
258+
"open" => {
259+
if let Some(mode_arg) = call.arguments.find_argument("mode", 0) {
260+
if is_binary_mode(mode_arg).unwrap_or(true) {
261+
// binary mode or unknown mode is no violation
262+
return false;
263+
}
264+
}
265+
// else mode not specified, defaults to text mode
266+
call.arguments.find_argument("encoding", 2).is_none()
267+
}
268+
"read_text" => call.arguments.find_argument("encoding", 0).is_none(),
269+
"write_text" => call.arguments.find_argument("encoding", 1).is_none(),
270+
_ => false,
271+
},
191272
}
192273
}
193274

194275
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
195-
enum Mode {
276+
enum ModeArgument {
196277
/// The call supports a `mode` argument.
197278
Supported,
198279
/// The call does not support a `mode` argument.
199280
Unsupported,
200281
}
201-
202-
impl From<&QualifiedName<'_>> for Mode {
203-
fn from(value: &QualifiedName<'_>) -> Self {
204-
match value.segments() {
205-
["" | "codecs" | "_io", "open"] => Mode::Supported,
206-
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
207-
Mode::Supported
208-
}
209-
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
210-
_ => Mode::Unsupported,
211-
}
212-
}
213-
}

crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap

+81
Original file line numberDiff line numberDiff line change
@@ -352,5 +352,86 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
352352
69 |+ (("test.txt")), encoding="locale",
353353
70 70 | # comment
354354
71 71 | )
355+
72 72 |
355356

357+
unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
358+
|
359+
76 | # Errors.
360+
77 | Path("foo.txt").open()
361+
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
362+
78 | Path("foo.txt").open("w")
363+
79 | text = Path("foo.txt").read_text()
364+
|
365+
= help: Add explicit `encoding` argument
366+
367+
Unsafe fix
368+
74 74 | from pathlib import Path
369+
75 75 |
370+
76 76 | # Errors.
371+
77 |-Path("foo.txt").open()
372+
77 |+Path("foo.txt").open(encoding="locale")
373+
78 78 | Path("foo.txt").open("w")
374+
79 79 | text = Path("foo.txt").read_text()
375+
80 80 | Path("foo.txt").write_text(text)
376+
377+
unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
378+
|
379+
76 | # Errors.
380+
77 | Path("foo.txt").open()
381+
78 | Path("foo.txt").open("w")
382+
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
383+
79 | text = Path("foo.txt").read_text()
384+
80 | Path("foo.txt").write_text(text)
385+
|
386+
= help: Add explicit `encoding` argument
387+
388+
Unsafe fix
389+
75 75 |
390+
76 76 | # Errors.
391+
77 77 | Path("foo.txt").open()
392+
78 |-Path("foo.txt").open("w")
393+
78 |+Path("foo.txt").open("w", encoding="locale")
394+
79 79 | text = Path("foo.txt").read_text()
395+
80 80 | Path("foo.txt").write_text(text)
396+
81 81 |
397+
398+
unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
399+
|
400+
77 | Path("foo.txt").open()
401+
78 | Path("foo.txt").open("w")
402+
79 | text = Path("foo.txt").read_text()
403+
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
404+
80 | Path("foo.txt").write_text(text)
405+
|
406+
= help: Add explicit `encoding` argument
407+
408+
Unsafe fix
409+
76 76 | # Errors.
410+
77 77 | Path("foo.txt").open()
411+
78 78 | Path("foo.txt").open("w")
412+
79 |-text = Path("foo.txt").read_text()
413+
79 |+text = Path("foo.txt").read_text(encoding="locale")
414+
80 80 | Path("foo.txt").write_text(text)
415+
81 81 |
416+
82 82 | # Non-errors.
417+
418+
unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
419+
|
420+
78 | Path("foo.txt").open("w")
421+
79 | text = Path("foo.txt").read_text()
422+
80 | Path("foo.txt").write_text(text)
423+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
424+
81 |
425+
82 | # Non-errors.
426+
|
427+
= help: Add explicit `encoding` argument
356428

429+
Unsafe fix
430+
77 77 | Path("foo.txt").open()
431+
78 78 | Path("foo.txt").open("w")
432+
79 79 | text = Path("foo.txt").read_text()
433+
80 |-Path("foo.txt").write_text(text)
434+
80 |+Path("foo.txt").write_text(text, encoding="locale")
435+
81 81 |
436+
82 82 | # Non-errors.
437+
83 83 | Path("foo.txt").open(encoding="utf-8")

0 commit comments

Comments
 (0)