由一道CTF题出发的python对象学习与分析

前几天参加了NepCTF，里面有一道题卡住我很久，也成功勾起我对深入学习python的兴趣

先来看一下这道题吧：

safebank

源码在这里：

from flask import Flask, request, make_response, render_template, redirect, url_for
import jsonpickle
import base64
import json
import os
import time

app = Flask(__name__)
app.secret_key = os.urandom(24)


class Account:
    def __init__(self, uid, pwd):
        self.uid = uid
        self.pwd = pwd


class Session:
    def __init__(self, meta):
        self.meta = meta


users_db = [
    Account("admin", os.urandom(16).hex()),
    Account("guest", "guest")
]


def register_user(username, password):
    for acc in users_db:
        if acc.uid == username:
            return False
    users_db.append(Account(username, password))
    return True


FORBIDDEN = [
    'builtins', 'os', 'system', 'repr', '__class__', 'subprocess', 'popen', 'Popen', 'nt',
    'code', 'reduce', 'compile', 'command', 'pty', 'platform', 'pdb', 'pickle', 'marshal',
    'socket', 'threading', 'multiprocessing', 'signal', 'traceback', 'inspect', '\\\\', 'posix',
    'render_template', 'jsonpickle', 'cgi', 'execfile', 'importlib', 'sys', 'shutil', 'state',
    'import', 'ctypes', 'timeit', 'input', 'open', 'codecs', 'base64', 'jinja2', 're', 'json',
    'file', 'write', 'read', 'globals', 'locals', 'getattr', 'setattr', 'delattr', 'uuid',
    '__import__', '__globals__', '__code__', '__closure__', '__func__', '__self__', 'pydoc',
    '__module__', '__dict__', '__mro__', '__subclasses__', '__init__', '__new__'
]


def waf(serialized):
    try:
        data = json.loads(serialized)
        payload = json.dumps(data, ensure_ascii=False)
        for bad in FORBIDDEN:
            if bad in payload:
                return bad
        return None
    except:
        return "error"


@app.route('/')
def root():
    return render_template('index.html')


@app.route('/register', methods=['GET', 'POST'])
def register():
    if request.method == 'POST':
        username = request.form.get('username')
        password = request.form.get('password')
        confirm_password = request.form.get('confirm_password')

        if not username or not password or not confirm_password:
            return render_template('register.html', error="所有字段都是必填的。")

        if password != confirm_password:
            return render_template('register.html', error="密码不匹配。")

        if len(username) < 4 or len(password) < 6:
            return render_template('register.html', error="用户名至少需要4个字符，密码至少需要6个字符。")

        if register_user(username, password):
            return render_template('index.html', message="注册成功！请登录。")
        else:
            return render_template('register.html', error="用户名已存在。")

    return render_template('register.html')


@app.post('/auth')
def auth():
    u = request.form.get("u")
    p = request.form.get("p")
    for acc in users_db:
        if acc.uid == u and acc.pwd == p:
            sess_data = Session({'user': u, 'ts': int(time.time())})
            token_raw = jsonpickle.encode(sess_data)
            b64_token = base64.b64encode(token_raw.encode()).decode()
            resp = make_response("登录成功。")
            resp.set_cookie("authz", b64_token)
            resp.status_code = 302
            resp.headers['Location'] = '/panel'
            return resp
    return render_template('index.html', error="登录失败。用户名或密码无效。")


@app.route('/panel')
def panel():
    token = request.cookies.get("authz")
    if not token:
        return redirect(url_for('root', error="缺少Token。"))

    try:
        decoded = base64.b64decode(token.encode()).decode()
    except:
        return render_template('error.html', error="Token格式错误。")

    ban = waf(decoded)
    if ban:
        return render_template('error.html', error=f"请不要黑客攻击！{ban}")

    try:
        sess_obj = jsonpickle.decode(decoded, safe=True)
        meta = sess_obj.meta

        if meta.get("user") != "admin":
            return render_template('user_panel.html', username=meta.get('user'))

        return render_template('admin_panel.html')
    except Exception as e:
        return render_template('error.html', error="数据解码失败。")


@app.route('/vault')
def vault():
    token = request.cookies.get("authz")
    if not token:
        return redirect(url_for('root'))

    try:
        decoded = base64.b64decode(token.encode()).decode()
        if waf(decoded):
            return render_template('error.html', error="请不要尝试黑客攻击！")
        sess_obj = jsonpickle.decode(decoded, safe=True)
        meta = sess_obj.meta

        if meta.get("user") != "admin":
            return render_template('error.html', error="访问被拒绝。只有管理员才能查看此页面。")

        flag = "NepCTF{fake_flag_this_is_not_the_real_one}"
        return render_template('vault.html', flag=flag)
    except:
        return redirect(url_for('root'))


@app.route('/about')
def about():
    return render_template('about.html')


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8000, debug=False)

这道题的主要考点在于要通过jsonpickle的反序列化进行RCE，而黑名单的过滤非常严格，很多方法都利用不了

常规解法是利用jsonpickle在处理exception时按eval来处理这一点入手的

引起我注意的拉蒙特徐（LamentXU）大佬的非预期解法

他注意到，既然能够直接访问全局空间的变量，那就可以直接访问黑名单，使用list.clear()，直接将黑名单给扬了

这是他给出的payload：

{"py/object": "__main__.Session", "meta": {"user": {"py/object":"__main__.FORBIDDEN.clear","py/newargs": []},"ts":1753446254}}

这个非预期解法的精妙之处，在于它不同于以往对”攻击”的认知。它不是通过绕过WAF，而是利用系统本身的合法机制来让WAF自己“破防”。

在了解这一解法之后，我就开始思考，为什么能够这样

Python 对象模型的统一性

在 Python 中，类、函数、方法、模块都是对象，都遵循相同的对象模型。这种统一性意味着：

所有对象都可以通过相同的方式被引用和操作
方法对象和普通对象在引用机制上没有本质区别
对象构造过程对所有类型都适用

payload的关键部分：

{
  "py/object": "__main__.FORBIDDEN.clear",
  "py/newargs": []
}

这里jsonpickle会尝试：

找到__main__.FORBIDDEN.clear这个对象
使用py/newargs中的参数来”构造”它

2. 方法对象的特殊性

在Python中，FORBIDDEN.clear是一个绑定方法(bound method)：

它关联了FORBIDDEN列表实例和clear方法
当jsonpickle尝试”构造”这个方法对象时，实际上会调用这个方法

3. 反序列化过程的执行流

# jsonpickle在反序列化时的伪代码逻辑
def decode_object(obj_dict):
    class_path = obj_dict["py/object"]  # "__main__.FORBIDDEN.clear"
    args = obj_dict.get("py/newargs", [])  # []
    
    # 获取目标对象
    target_obj = import_and_get(class_path)
    
    # 如果是可调用对象且有构造参数，就调用它
    if callable(target_obj) and args is not None:
        return target_obj(*args)  # 这里实际调用了FORBIDDEN.clear()
    else:
        return target_obj

这个解法是真正触及到python“一切皆对象”的思想的

一切皆可引用

Python中的对象引用系统非常统一：

# 变量引用
x = FORBIDDEN

# 属性引用
clear_method = FORBIDDEN.clear

# 模块引用
main_module = __main__

# 甚至可以通过字符串动态引用
import importlib
module_obj = importlib.import_module('__main__')
forbidden_obj = getattr(module_obj, 'FORBIDDEN')
clear_method = getattr(forbidden_obj, 'clear')

方法调用的本质

# 这两种调用方式是等价的
FORBIDDEN.clear()
list.clear(FORBIDDEN)

# 因为 FORBIDDEN.clear 实际上是一个部分应用的函数
bound_method = FORBIDDEN.clear
bound_method()  # 自动传递了 FORBIDDEN 作为 self

这道题也成功激起了我学习python源码的兴趣，不过我python源码的学习笔记放语雀了（欸嘿）